Upload folder using huggingface_hub
Browse files- .gitattributes +1 -0
- README.md +130 -0
- added_tokens.json +3 -0
- chat_template.jinja +47 -0
- config.json +107 -0
- generation_config.json +13 -0
- model-00001-of-00005.safetensors +3 -0
- model-00002-of-00005.safetensors +3 -0
- model-00003-of-00005.safetensors +3 -0
- model-00004-of-00005.safetensors +3 -0
- model-00005-of-00005.safetensors +3 -0
- model.safetensors.index.json +0 -0
- preprocessor_config.json +29 -0
- processor_config.json +4 -0
- runs/Nov24_00-10-21_jzxh071/events.out.tfevents.1763939522.jzxh071.3081979.0 +3 -0
- slurm.out +387 -0
- special_tokens_map.json +33 -0
- tokenizer.json +3 -0
- tokenizer.model +3 -0
- tokenizer_config.json +0 -0
- training_args.bin +3 -0
.gitattributes
CHANGED
|
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
| 36 |
+
tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
README.md
ADDED
|
@@ -0,0 +1,130 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
---
|
| 2 |
+
library_name: transformers
|
| 3 |
+
tags:
|
| 4 |
+
- generated_from_trainer
|
| 5 |
+
model-index:
|
| 6 |
+
- name: lustre/fswork/projects/rech/dgo/udv55np/ift/Nemotron-Super-49B-v1_5/gemma-3-12b/0
|
| 7 |
+
results: []
|
| 8 |
+
---
|
| 9 |
+
|
| 10 |
+
<!-- This model card has been generated automatically according to the information the Trainer had access to. You
|
| 11 |
+
should probably proofread and complete it, then remove this comment. -->
|
| 12 |
+
|
| 13 |
+
[<img src="https://raw.githubusercontent.com/axolotl-ai-cloud/axolotl/main/image/axolotl-badge-web.png" alt="Built with Axolotl" width="200" height="32"/>](https://github.com/axolotl-ai-cloud/axolotl)
|
| 14 |
+
<details><summary>See axolotl config</summary>
|
| 15 |
+
|
| 16 |
+
axolotl version: `0.12.2`
|
| 17 |
+
```yaml
|
| 18 |
+
base_model: /lustre/fswork/projects/rech/qwv/udv55np/Gemma/base/gemma-3-12b
|
| 19 |
+
|
| 20 |
+
datasets:
|
| 21 |
+
- path: /lustre/fswork/projects/rech/qwv/udv55np/dataset/ift/Nemotron-Super-49B-v1_5/no_thinking
|
| 22 |
+
ds_type: json
|
| 23 |
+
type: chat_template
|
| 24 |
+
field_messages: conversations
|
| 25 |
+
data_files:
|
| 26 |
+
- /lustre/fswork/projects/rech/qwv/udv55np/dataset/ift/Nemotron-Super-49B-v1_5/no_thinking/0007.jsonl
|
| 27 |
+
- /lustre/fswork/projects/rech/qwv/udv55np/dataset/ift/Nemotron-Super-49B-v1_5/no_thinking/0009.jsonl
|
| 28 |
+
- /lustre/fswork/projects/rech/qwv/udv55np/dataset/ift/Nemotron-Super-49B-v1_5/no_thinking/0005.jsonl
|
| 29 |
+
- /lustre/fswork/projects/rech/qwv/udv55np/dataset/ift/Nemotron-Super-49B-v1_5/no_thinking/0006.jsonl
|
| 30 |
+
- /lustre/fswork/projects/rech/qwv/udv55np/dataset/ift/Nemotron-Super-49B-v1_5/no_thinking/0014.jsonl
|
| 31 |
+
- /lustre/fswork/projects/rech/qwv/udv55np/dataset/ift/Nemotron-Super-49B-v1_5/no_thinking/0010.jsonl
|
| 32 |
+
- /lustre/fswork/projects/rech/qwv/udv55np/dataset/ift/Nemotron-Super-49B-v1_5/no_thinking/0012.jsonl
|
| 33 |
+
- /lustre/fswork/projects/rech/qwv/udv55np/dataset/ift/Nemotron-Super-49B-v1_5/no_thinking/0008.jsonl
|
| 34 |
+
- /lustre/fswork/projects/rech/qwv/udv55np/dataset/ift/Nemotron-Super-49B-v1_5/no_thinking/0001.jsonl
|
| 35 |
+
- /lustre/fswork/projects/rech/qwv/udv55np/dataset/ift/Nemotron-Super-49B-v1_5/no_thinking/0002.jsonl
|
| 36 |
+
- /lustre/fswork/projects/rech/qwv/udv55np/dataset/ift/Nemotron-Super-49B-v1_5/no_thinking/0013.jsonl
|
| 37 |
+
- /lustre/fswork/projects/rech/qwv/udv55np/dataset/ift/Nemotron-Super-49B-v1_5/no_thinking/0015.jsonl
|
| 38 |
+
- /lustre/fswork/projects/rech/qwv/udv55np/dataset/ift/Nemotron-Super-49B-v1_5/no_thinking/0004.jsonl
|
| 39 |
+
- /lustre/fswork/projects/rech/qwv/udv55np/dataset/ift/Nemotron-Super-49B-v1_5/no_thinking/0011.jsonl
|
| 40 |
+
- /lustre/fswork/projects/rech/qwv/udv55np/dataset/ift/Nemotron-Super-49B-v1_5/no_thinking/0000.jsonl
|
| 41 |
+
- /lustre/fswork/projects/rech/qwv/udv55np/dataset/ift/Nemotron-Super-49B-v1_5/no_thinking/0003.jsonl
|
| 42 |
+
|
| 43 |
+
dataset_prepared_path: /lustre/fswork/projects/rech/dgo/udv55np/dataset_gemma/Nemotron-Super-49B-v1_5/split_0
|
| 44 |
+
tokenizer_config: "/lustre/fswork/projects/rech/qwv/udv55np/Gemma/base/gemma-3-27b"
|
| 45 |
+
chat_template: gemma3
|
| 46 |
+
eot_tokens:
|
| 47 |
+
- "<end_of_turn>"
|
| 48 |
+
|
| 49 |
+
shuffle_merged_datasets: true
|
| 50 |
+
output_dir: /lustre/fswork/projects/rech/dgo/udv55np/ift/Nemotron-Super-49B-v1_5/gemma-3-12b/0
|
| 51 |
+
|
| 52 |
+
sequence_len: 16384
|
| 53 |
+
sample_packing: true
|
| 54 |
+
|
| 55 |
+
gradient_accumulation_steps: 1
|
| 56 |
+
micro_batch_size: 1
|
| 57 |
+
num_epochs: 0.6
|
| 58 |
+
auto_resume_from_checkpoints: true
|
| 59 |
+
|
| 60 |
+
optimizer: adamw_torch_fused
|
| 61 |
+
lr_scheduler: warmup_stable_decay
|
| 62 |
+
learning_rate: 2e-6
|
| 63 |
+
lr_scheduler_kwargs:
|
| 64 |
+
num_decay_steps: 200
|
| 65 |
+
min_lr_ratio: 0.1
|
| 66 |
+
warmup_steps: 100
|
| 67 |
+
|
| 68 |
+
bf16: true
|
| 69 |
+
tf32: false
|
| 70 |
+
|
| 71 |
+
gradient_checkpointing: true
|
| 72 |
+
logging_steps: 10
|
| 73 |
+
flash_attention: true
|
| 74 |
+
|
| 75 |
+
evals_per_epoch: 0
|
| 76 |
+
saves_per_epoch: 1
|
| 77 |
+
save_total_limit: 20
|
| 78 |
+
save_only_model: true
|
| 79 |
+
|
| 80 |
+
use_tensorboard: true
|
| 81 |
+
deepspeed: /lustre/fswork/projects/rech/qwv/udv55np/axolotl/zero3.json
|
| 82 |
+
|
| 83 |
+
```
|
| 84 |
+
|
| 85 |
+
</details><br>
|
| 86 |
+
|
| 87 |
+
# lustre/fswork/projects/rech/dgo/udv55np/ift/Nemotron-Super-49B-v1_5/gemma-3-12b/0
|
| 88 |
+
|
| 89 |
+
This model was trained from scratch on the None dataset.
|
| 90 |
+
|
| 91 |
+
## Model description
|
| 92 |
+
|
| 93 |
+
More information needed
|
| 94 |
+
|
| 95 |
+
## Intended uses & limitations
|
| 96 |
+
|
| 97 |
+
More information needed
|
| 98 |
+
|
| 99 |
+
## Training and evaluation data
|
| 100 |
+
|
| 101 |
+
More information needed
|
| 102 |
+
|
| 103 |
+
## Training procedure
|
| 104 |
+
|
| 105 |
+
### Training hyperparameters
|
| 106 |
+
|
| 107 |
+
The following hyperparameters were used during training:
|
| 108 |
+
- learning_rate: 2e-06
|
| 109 |
+
- train_batch_size: 1
|
| 110 |
+
- eval_batch_size: 1
|
| 111 |
+
- seed: 42
|
| 112 |
+
- distributed_type: multi-GPU
|
| 113 |
+
- num_devices: 16
|
| 114 |
+
- total_train_batch_size: 16
|
| 115 |
+
- total_eval_batch_size: 16
|
| 116 |
+
- optimizer: Use OptimizerNames.ADAMW_TORCH_FUSED with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments
|
| 117 |
+
- lr_scheduler_type: warmup_stable_decay
|
| 118 |
+
- lr_scheduler_warmup_steps: 100
|
| 119 |
+
- training_steps: 711
|
| 120 |
+
|
| 121 |
+
### Training results
|
| 122 |
+
|
| 123 |
+
|
| 124 |
+
|
| 125 |
+
### Framework versions
|
| 126 |
+
|
| 127 |
+
- Transformers 4.55.2
|
| 128 |
+
- Pytorch 2.6.0+cu124
|
| 129 |
+
- Datasets 4.0.0
|
| 130 |
+
- Tokenizers 0.21.1
|
added_tokens.json
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"<image_soft_token>": 262144
|
| 3 |
+
}
|
chat_template.jinja
ADDED
|
@@ -0,0 +1,47 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{{ bos_token }}
|
| 2 |
+
{%- if messages[0]['role'] == 'system' -%}
|
| 3 |
+
{%- if messages[0]['content'] is string -%}
|
| 4 |
+
{%- set first_user_prefix = messages[0]['content'] + '
|
| 5 |
+
|
| 6 |
+
' -%}
|
| 7 |
+
{%- else -%}
|
| 8 |
+
{%- set first_user_prefix = messages[0]['content'][0]['text'] + '
|
| 9 |
+
|
| 10 |
+
' -%}
|
| 11 |
+
{%- endif -%}
|
| 12 |
+
{%- set loop_messages = messages[1:] -%}
|
| 13 |
+
{%- else -%}
|
| 14 |
+
{%- set first_user_prefix = "" -%}
|
| 15 |
+
{%- set loop_messages = messages -%}
|
| 16 |
+
{%- endif -%}
|
| 17 |
+
{%- for message in loop_messages -%}
|
| 18 |
+
{%- if (message['role'] == 'user') != (loop.index0 % 2 == 0) -%}
|
| 19 |
+
{{ raise_exception("Conversation roles must alternate user/assistant/user/assistant/...") }}
|
| 20 |
+
{%- endif -%}
|
| 21 |
+
{%- if (message['role'] == 'assistant') -%}
|
| 22 |
+
{%- set role = "model" -%}
|
| 23 |
+
{%- else -%}
|
| 24 |
+
{%- set role = message['role'] -%}
|
| 25 |
+
{%- endif -%}
|
| 26 |
+
{{ '<start_of_turn>' + role + '
|
| 27 |
+
' + (first_user_prefix if loop.first else "") }}
|
| 28 |
+
{%- if message['content'] is string -%}
|
| 29 |
+
{{ message['content'] | trim }}
|
| 30 |
+
{%- elif message['content'] is iterable -%}
|
| 31 |
+
{%- for item in message['content'] -%}
|
| 32 |
+
{%- if item['type'] == 'image' -%}
|
| 33 |
+
{{ '<start_of_image>' }}
|
| 34 |
+
{%- elif item['type'] == 'text' -%}
|
| 35 |
+
{{ item['text'] | trim }}
|
| 36 |
+
{%- endif -%}
|
| 37 |
+
{%- endfor -%}
|
| 38 |
+
{%- else -%}
|
| 39 |
+
{{ raise_exception("Invalid content type") }}
|
| 40 |
+
{%- endif -%}
|
| 41 |
+
{{ '<end_of_turn>
|
| 42 |
+
' }}
|
| 43 |
+
{%- endfor -%}
|
| 44 |
+
{%- if add_generation_prompt -%}
|
| 45 |
+
{{'<start_of_turn>model
|
| 46 |
+
'}}
|
| 47 |
+
{%- endif -%}
|
config.json
ADDED
|
@@ -0,0 +1,107 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"architectures": [
|
| 3 |
+
"Gemma3ForConditionalGeneration"
|
| 4 |
+
],
|
| 5 |
+
"boi_token_index": 255999,
|
| 6 |
+
"eoi_token_index": 256000,
|
| 7 |
+
"image_token_index": 262144,
|
| 8 |
+
"initializer_range": 0.02,
|
| 9 |
+
"mm_tokens_per_image": 256,
|
| 10 |
+
"model_type": "gemma3",
|
| 11 |
+
"text_config": {
|
| 12 |
+
"_sliding_window_pattern": 6,
|
| 13 |
+
"attention_bias": false,
|
| 14 |
+
"attention_dropout": 0.0,
|
| 15 |
+
"attn_logit_softcapping": null,
|
| 16 |
+
"final_logit_softcapping": null,
|
| 17 |
+
"head_dim": 256,
|
| 18 |
+
"hidden_activation": "gelu_pytorch_tanh",
|
| 19 |
+
"hidden_size": 3840,
|
| 20 |
+
"initializer_range": 0.02,
|
| 21 |
+
"intermediate_size": 15360,
|
| 22 |
+
"layer_types": [
|
| 23 |
+
"sliding_attention",
|
| 24 |
+
"sliding_attention",
|
| 25 |
+
"sliding_attention",
|
| 26 |
+
"sliding_attention",
|
| 27 |
+
"sliding_attention",
|
| 28 |
+
"full_attention",
|
| 29 |
+
"sliding_attention",
|
| 30 |
+
"sliding_attention",
|
| 31 |
+
"sliding_attention",
|
| 32 |
+
"sliding_attention",
|
| 33 |
+
"sliding_attention",
|
| 34 |
+
"full_attention",
|
| 35 |
+
"sliding_attention",
|
| 36 |
+
"sliding_attention",
|
| 37 |
+
"sliding_attention",
|
| 38 |
+
"sliding_attention",
|
| 39 |
+
"sliding_attention",
|
| 40 |
+
"full_attention",
|
| 41 |
+
"sliding_attention",
|
| 42 |
+
"sliding_attention",
|
| 43 |
+
"sliding_attention",
|
| 44 |
+
"sliding_attention",
|
| 45 |
+
"sliding_attention",
|
| 46 |
+
"full_attention",
|
| 47 |
+
"sliding_attention",
|
| 48 |
+
"sliding_attention",
|
| 49 |
+
"sliding_attention",
|
| 50 |
+
"sliding_attention",
|
| 51 |
+
"sliding_attention",
|
| 52 |
+
"full_attention",
|
| 53 |
+
"sliding_attention",
|
| 54 |
+
"sliding_attention",
|
| 55 |
+
"sliding_attention",
|
| 56 |
+
"sliding_attention",
|
| 57 |
+
"sliding_attention",
|
| 58 |
+
"full_attention",
|
| 59 |
+
"sliding_attention",
|
| 60 |
+
"sliding_attention",
|
| 61 |
+
"sliding_attention",
|
| 62 |
+
"sliding_attention",
|
| 63 |
+
"sliding_attention",
|
| 64 |
+
"full_attention",
|
| 65 |
+
"sliding_attention",
|
| 66 |
+
"sliding_attention",
|
| 67 |
+
"sliding_attention",
|
| 68 |
+
"sliding_attention",
|
| 69 |
+
"sliding_attention",
|
| 70 |
+
"full_attention"
|
| 71 |
+
],
|
| 72 |
+
"max_position_embeddings": 131072,
|
| 73 |
+
"model_type": "gemma3_text",
|
| 74 |
+
"num_attention_heads": 16,
|
| 75 |
+
"num_hidden_layers": 48,
|
| 76 |
+
"num_key_value_heads": 8,
|
| 77 |
+
"query_pre_attn_scalar": 256,
|
| 78 |
+
"rms_norm_eps": 1e-06,
|
| 79 |
+
"rope_local_base_freq": 10000.0,
|
| 80 |
+
"rope_scaling": {
|
| 81 |
+
"factor": 8.0,
|
| 82 |
+
"rope_type": "linear"
|
| 83 |
+
},
|
| 84 |
+
"rope_theta": 1000000.0,
|
| 85 |
+
"sliding_window": 1024,
|
| 86 |
+
"torch_dtype": "bfloat16",
|
| 87 |
+
"use_cache": false,
|
| 88 |
+
"vocab_size": 262208
|
| 89 |
+
},
|
| 90 |
+
"torch_dtype": "bfloat16",
|
| 91 |
+
"transformers_version": "4.55.2",
|
| 92 |
+
"vision_config": {
|
| 93 |
+
"attention_dropout": 0.0,
|
| 94 |
+
"hidden_act": "gelu_pytorch_tanh",
|
| 95 |
+
"hidden_size": 1152,
|
| 96 |
+
"image_size": 896,
|
| 97 |
+
"intermediate_size": 4304,
|
| 98 |
+
"layer_norm_eps": 1e-06,
|
| 99 |
+
"model_type": "siglip_vision_model",
|
| 100 |
+
"num_attention_heads": 16,
|
| 101 |
+
"num_channels": 3,
|
| 102 |
+
"num_hidden_layers": 27,
|
| 103 |
+
"patch_size": 14,
|
| 104 |
+
"torch_dtype": "bfloat16",
|
| 105 |
+
"vision_use_head": false
|
| 106 |
+
}
|
| 107 |
+
}
|
generation_config.json
ADDED
|
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"bos_token_id": 2,
|
| 3 |
+
"cache_implementation": "hybrid",
|
| 4 |
+
"do_sample": true,
|
| 5 |
+
"eos_token_id": [
|
| 6 |
+
1,
|
| 7 |
+
106
|
| 8 |
+
],
|
| 9 |
+
"pad_token_id": 0,
|
| 10 |
+
"top_k": 64,
|
| 11 |
+
"top_p": 0.95,
|
| 12 |
+
"transformers_version": "4.55.2"
|
| 13 |
+
}
|
model-00001-of-00005.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:48713f75a388bf872e79b3cdf174382aa5b2969c2681393afca5e2d6d8a14763
|
| 3 |
+
size 4979902192
|
model-00002-of-00005.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1a420cf54d4f15c12aec81b16c493493710751552f296f122a04765066848758
|
| 3 |
+
size 4931296592
|
model-00003-of-00005.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ac5e315f04c8f2e2fc82758ab450578958518e2b2fb4ec1743775b5f4d6a6683
|
| 3 |
+
size 4931296656
|
model-00004-of-00005.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2a851bc5d8dfee5bd6a242188d006af820dca1c9775340bb3e543fd5b466041f
|
| 3 |
+
size 4931296656
|
model-00005-of-00005.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5d2e99d57cd426e6d1e5004830b646ddaf5f52c50fdb117706ffa11356ccb63a
|
| 3 |
+
size 4601000928
|
model.safetensors.index.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
preprocessor_config.json
ADDED
|
@@ -0,0 +1,29 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"do_convert_rgb": null,
|
| 3 |
+
"do_normalize": true,
|
| 4 |
+
"do_pan_and_scan": null,
|
| 5 |
+
"do_rescale": true,
|
| 6 |
+
"do_resize": true,
|
| 7 |
+
"image_mean": [
|
| 8 |
+
0.5,
|
| 9 |
+
0.5,
|
| 10 |
+
0.5
|
| 11 |
+
],
|
| 12 |
+
"image_processor_type": "Gemma3ImageProcessor",
|
| 13 |
+
"image_seq_length": 256,
|
| 14 |
+
"image_std": [
|
| 15 |
+
0.5,
|
| 16 |
+
0.5,
|
| 17 |
+
0.5
|
| 18 |
+
],
|
| 19 |
+
"pan_and_scan_max_num_crops": null,
|
| 20 |
+
"pan_and_scan_min_crop_size": null,
|
| 21 |
+
"pan_and_scan_min_ratio_to_activate": null,
|
| 22 |
+
"processor_class": "Gemma3Processor",
|
| 23 |
+
"resample": 2,
|
| 24 |
+
"rescale_factor": 0.00392156862745098,
|
| 25 |
+
"size": {
|
| 26 |
+
"height": 896,
|
| 27 |
+
"width": 896
|
| 28 |
+
}
|
| 29 |
+
}
|
processor_config.json
ADDED
|
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"image_seq_length": 256,
|
| 3 |
+
"processor_class": "Gemma3Processor"
|
| 4 |
+
}
|
runs/Nov24_00-10-21_jzxh071/events.out.tfevents.1763939522.jzxh071.3081979.0
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f37098112ad379ad01786305414c07051d2d994842774ecf72b33167987104ed
|
| 3 |
+
size 42188
|
slurm.out
ADDED
|
@@ -0,0 +1,387 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 0 |
0%| | 0/711 [00:00<?, ?it/s]
|
| 1 |
0%| | 1/711 [03:14<38:20:16, 194.39s/it]
|
| 2 |
0%| | 2/711 [03:19<16:22:13, 83.12s/it]
|
| 3 |
0%| | 3/711 [03:24<9:20:24, 47.49s/it]
|
| 4 |
1%| | 4/711 [03:29<6:02:21, 30.75s/it]
|
| 5 |
1%| | 5/711 [03:35<4:14:24, 21.62s/it]
|
| 6 |
1%| | 6/711 [03:40<3:09:35, 16.14s/it]
|
| 7 |
1%| | 7/711 [03:45<2:27:08, 12.54s/it]
|
| 8 |
1%| | 8/711 [03:50<1:59:06, 10.17s/it]
|
| 9 |
1%|▏ | 9/711 [03:56<1:40:22, 8.58s/it]
|
| 10 |
1%|▏ | 10/711 [04:01<1:27:41, 7.51s/it]
|
| 11 |
|
| 12 |
1%|▏ | 10/711 [04:01<1:27:41, 7.51s/it]
|
| 13 |
2%|▏ | 11/711 [04:06<1:18:52, 6.76s/it]
|
| 14 |
2%|▏ | 12/711 [04:11<1:12:59, 6.27s/it]
|
| 15 |
2%|▏ | 13/711 [04:16<1:08:45, 5.91s/it]
|
| 16 |
2%|▏ | 14/711 [04:21<1:06:01, 5.68s/it]
|
| 17 |
2%|▏ | 15/711 [04:26<1:03:51, 5.51s/it]
|
| 18 |
2%|▏ | 16/711 [04:31<1:02:15, 5.38s/it]
|
| 19 |
2%|▏ | 17/711 [04:37<1:01:45, 5.34s/i
|
|
|
|
|
|
|
|
|
|
| 20 |
3%|▎ | 18/711 [04:42<1:01:12, 5.30s/it]
|
| 21 |
3%|▎ | 19/711 [04:47<1:00:37, 5.26s/it]
|
| 22 |
3%|▎ | 20/711 [04:52<1:00:10, 5.23s/it]
|
| 23 |
|
| 24 |
3%|▎ | 20/711 [04:52<1:00:10, 5.23s/it]
|
| 25 |
3%|▎ | 21/711 [04:57<59:44, 5.19s/it]
|
| 26 |
3%|▎ | 22/711 [05:02<59:22, 5.17s/it]
|
| 27 |
3%|▎ | 23/711 [05:07<59:00, 5.15s/it]
|
| 28 |
3%|▎ | 24/711 [05:13<58:57, 5.15s/it]
|
| 29 |
4%|▎ | 25/711 [05:18<59:09, 5.17s/it]
|
| 30 |
4%|▎ | 26/711 [05:23<58:51, 5.16s/it]
|
| 31 |
4%|▍ | 27/711 [05:28<58:27, 5.13s/it]
|
| 32 |
4%|▍ | 28/711 [05:33<58:17, 5.12s/it]
|
| 33 |
4%|▍ | 29/711 [05:38<58:07, 5.11s/it]
|
| 34 |
4%|▍ | 30/711 [05:44<59:10, 5.21s/it]
|
| 35 |
|
| 36 |
4%|▍ | 30/711 [05:44<59:10, 5.21s/it]
|
| 37 |
4%|▍ | 31/711 [05:49<58:50, 5.19s/it]
|
| 38 |
5%|▍ | 32/711 [05:54<58:36, 5.18s/it]
|
| 39 |
5%|▍ | 33/711 [05:59<59:31, 5
|
|
|
|
|
|
|
| 40 |
5%|▍ | 34/711 [06:04<58:50, 5.22s/it]
|
| 41 |
5%|▍ | 35/711 [06:10<58:21, 5.18s/it]
|
| 42 |
5%|▌ | 36/711 [06:15<57:59, 5.16s/it]
|
| 43 |
5%|▌ | 37/711 [06:20<57:35, 5.13s/it]
|
| 44 |
5%|▌ | 38/711 [06:25<57:22, 5.12s/it]
|
| 45 |
5%|▌ | 39/711 [06:30<57:16, 5.11s/it]
|
| 46 |
6%|▌ | 40/711 [06:35<57:20, 5.13s/it]
|
| 47 |
|
| 48 |
6%|▌ | 40/711 [06:35<57:20, 5.13s/it]
|
| 49 |
6%|▌ | 41/711 [06:40<57:23, 5.14s/it]
|
| 50 |
6%|▌ | 42/711 [06:45<57:09, 5.13s/it]
|
| 51 |
6%|▌ | 43/711 [06:51<57:31, 5.17s/it]
|
| 52 |
6%|▌ | 44/711 [06:56<57:25, 5.17s/it]
|
| 53 |
6%|▋ | 45/711 [07:01<57:08, 5.15s/it]
|
| 54 |
6%|▋ | 46/711 [07:06<57:01, 5.15s/it]
|
| 55 |
7%|▋ | 47/711 [07:11<56:53, 5.14s/it]
|
| 56 |
7%|▋ | 48/711 [07:16<57:06, 5.17s/it]
|
| 57 |
7%|▋ | 49/711 [07:22<57:03, 5.17s/it]
|
| 58 |
7%|▋ | 50/711 [07:27<57:26, 5.21s/it]
|
| 59 |
|
|
|
|
|
|
|
|
|
|
| 60 |
7%|▋ | 50/711 [07:27<57:26, 5.21s/it]
|
| 61 |
7%|▋ | 51/711 [07:32<57:06, 5.19s/it]
|
| 62 |
7%|▋ | 52/711 [07:37<57:02, 5.19s/it]
|
| 63 |
7%|▋ | 53/711 [07:43<57:29, 5.24s/it]
|
| 64 |
8%|▊ | 54/711 [07:48<58:04, 5.30s/it]
|
| 65 |
8%|▊ | 55/711 [07:53<57:20, 5.25s/it]
|
| 66 |
8%|▊ | 56/711 [07:59<57:54, 5.30s/it]
|
| 67 |
8%|▊ | 57/711 [08:04<57:10, 5.25s/it]
|
| 68 |
8%|▊ | 58/711 [08:09<57:55, 5.32s/it]
|
| 69 |
8%|▊ | 59/711 [08:14<57:11, 5.26s/it]
|
| 70 |
8%|▊ | 60/711 [08:19<56:30, 5.21s/it]
|
| 71 |
|
| 72 |
8%|▊ | 60/711 [08:19<56:30, 5.21s/it]
|
| 73 |
9%|▊ | 61/711 [08:25<56:35, 5.22s/it]
|
| 74 |
9%|▊ | 62/711 [08:30<56:18, 5.21s/it]
|
| 75 |
9%|▉ | 63/711 [08:35<57:08, 5.29s/it]
|
| 76 |
9%|▉ | 64/711 [08:41<56:59, 5.28s/it]
|
| 77 |
9%|▉ | 65/711 [08:46<56:22, 5.24s/it]
|
| 78 |
9%|▉ | 66/711 [08:51<56:36, 5.27s/it]
|
| 79 |
9%|▉ | 67/711 [08:56<56:06, 5.23s/it]
|
| 80 |
1
|
|
|
|
|
|
|
|
|
|
| 81 |
10%|▉ | 69/711 [09:06<55:12, 5.16s/it]
|
| 82 |
10%|▉ | 70/711 [09:12<55:17, 5.18s/it]
|
| 83 |
|
| 84 |
10%|▉ | 70/711 [09:12<55:17, 5.18s/it]
|
| 85 |
10%|▉ | 71/711 [09:17<54:55, 5.15s/it]
|
| 86 |
10%|█ | 72/711 [09:22<54:39, 5.13s/it]
|
| 87 |
10%|█ | 73/711 [09:27<54:25, 5.12s/it]
|
| 88 |
10%|█ | 74/711 [09:32<54:18, 5.12s/it]
|
| 89 |
11%|█ | 75/711 [09:37<54:05, 5.10s/it]
|
| 90 |
11%|█ | 76/711 [09:42<54:01, 5.11s/it]
|
| 91 |
11%|█ | 77/711 [09:47<53:56, 5.11s/it]
|
| 92 |
11%|█ | 78/711 [09:52<54:06, 5.13s/it]
|
| 93 |
11%|█ | 79/711 [09:57<53:56, 5.12s/it]
|
| 94 |
11%|█▏ | 80/711 [10:03<54:04, 5.14s/it]
|
| 95 |
|
| 96 |
11%|█▏ | 80/711 [10:03<54:04, 5.14s/it]
|
| 97 |
11%|█▏ | 81/711 [10:08<55:07, 5.25s/it]
|
| 98 |
12%|█▏ | 82/711 [10:13<54:50, 5.23s/it]
|
| 99 |
12%|█▏ | 83/711 [10:19<55:29, 5.30s/it
|
|
|
|
|
|
|
| 100 |
12%|█▏ | 84/711 [10:24<54:57, 5.26s/it]
|
| 101 |
12%|█▏ | 85/711 [10:29<54:21, 5.21s/it]
|
| 102 |
12%|█▏ | 86/711 [10:35<55:05, 5.29s/it]
|
| 103 |
12%|█▏ | 87/711 [10:40<54:33, 5.25s/it]
|
| 104 |
12%|█▏ | 88/711 [10:45<54:02, 5.20s/it]
|
| 105 |
13%|█▎ | 89/711 [10:50<53:35, 5.17s/it]
|
| 106 |
13%|█▎ | 90/711 [10:55<53:14, 5.14s/it]
|
| 107 |
|
| 108 |
13%|█▎ | 90/711 [10:55<53:14, 5.14s/it]
|
| 109 |
13%|█▎ | 91/711 [11:00<52:59, 5.13s/it]
|
| 110 |
13%|█▎ | 92/711 [11:05<53:42, 5.21s/it]
|
| 111 |
13%|█▎ | 93/711 [11:11<54:32, 5.30s/it]
|
| 112 |
13%|█▎ | 94/711 [11:16<54:50, 5.33s/it]
|
| 113 |
13%|█▎ | 95/711 [11:22<55:36, 5.42s/it]
|
| 114 |
14%|█▎ | 96/711 [11:27<55:11, 5.38s/it]
|
| 115 |
14%|█▎ | 97/711 [11:32<54:13, 5.30s/it]
|
| 116 |
14%|█▍ | 98/711 [11:38<54:39, 5.35s/it]
|
| 117 |
14%|█▍ | 99/711 [11:43<53:58, 5.29s/it]
|
| 118 |
14%|█▍ | 100/711 [11:48<53:15, 5.23s/it]
|
| 119 |
|
|
|
|
|
|
|
|
|
|
| 120 |
14%|█▍ | 100/711 [11:48<53:15, 5.23s/it]
|
| 121 |
14%|█▍ | 101/711 [11:53<52:46, 5.19s/it]
|
| 122 |
14%|█▍ | 102/711 [11:58<52:21, 5.16s/it]
|
| 123 |
14%|█▍ | 103/711 [12:03<52:00, 5.13s/it]
|
| 124 |
15%|█▍ | 104/711 [12:08<51:44, 5.11s/it]
|
| 125 |
15%|█▍ | 105/711 [12:14<51:39, 5.11s/it]
|
| 126 |
15%|█▍ | 106/711 [12:19<51:43, 5.13s/it]
|
| 127 |
15%|█▌ | 107/711 [12:24<51:34, 5.12s/it]
|
| 128 |
15%|█▌ | 108/711 [12:29<51:22, 5.11s/it]
|
| 129 |
15%|█▌ | 109/711 [12:34<51:11, 5.10s/it]
|
| 130 |
15%|█▌ | 110/711 [12:39<51:14, 5.12s/it]
|
| 131 |
|
| 132 |
15%|█▌ | 110/711 [12:39<51:14, 5.12s/it]
|
| 133 |
16%|█▌ | 111/711 [12:44<51:43, 5.17s/it]
|
| 134 |
16%|█▌ | 112/711 [12:50<51:33, 5.16s/it]
|
| 135 |
16%|█▌ | 113/711 [12:55<52:08, 5.23s/it]
|
| 136 |
16%|█▌ | 114/711 [13:00<51:48, 5.21s/it]
|
| 137 |
16%|█▌ | 115/711 [13:05<51:23, 5.17s/it]
|
| 138 |
16%|█▋ |
|
|
|
|
|
|
|
|
|
|
| 139 |
16%|█▋ | 117/711 [13:16<51:03, 5.16s/it]
|
| 140 |
17%|█▋ | 118/711 [13:21<50:59, 5.16s/it]
|
| 141 |
17%|█▋ | 119/711 [13:26<51:20, 5.20s/it]
|
| 142 |
17%|█▋ | 120/711 [13:31<50:54, 5.17s/it]
|
| 143 |
|
| 144 |
17%|█▋ | 120/711 [13:31<50:54, 5.17s/it]
|
| 145 |
17%|█▋ | 121/711 [13:36<50:42, 5.16s/it]
|
| 146 |
17%|█▋ | 122/711 [13:41<50:37, 5.16s/it]
|
| 147 |
17%|█▋ | 123/711 [13:47<50:33, 5.16s/it]
|
| 148 |
17%|█▋ | 124/711 [13:52<50:44, 5.19s/it]
|
| 149 |
18%|█▊ | 125/711 [13:57<51:31, 5.28s/it]
|
| 150 |
18%|█▊ | 126/711 [14:03<52:11, 5.35s/it]
|
| 151 |
18%|█▊ | 127/711 [14:08<52:12, 5.36s/it]
|
| 152 |
18%|█▊ | 128/711 [14:14<52:10, 5.37s/it]
|
| 153 |
18%|█▊ | 129/711 [14:19<51:21, 5.29s/it]
|
| 154 |
18%|█▊ | 130/711 [14:24<50:50, 5.25s/it]
|
| 155 |
|
| 156 |
18%|█▊ | 130/711 [14:24<50:50, 5.25s/it]
|
| 157 |
18%|█▊ | 131/
|
|
|
|
|
|
|
| 158 |
19%|█▊ | 132/711 [14:34<50:04, 5.19s/it]
|
| 159 |
19%|█▊ | 133/711 [14:39<50:21, 5.23s/it]
|
| 160 |
19%|█▉ | 134/711 [14:45<49:58, 5.20s/it]
|
| 161 |
19%|█▉ | 135/711 [14:50<49:37, 5.17s/it]
|
| 162 |
19%|█▉ | 136/711 [14:55<49:54, 5.21s/it]
|
| 163 |
19%|█▉ | 137/711 [15:00<49:29, 5.17s/it]
|
| 164 |
19%|█▉ | 138/711 [15:05<49:14, 5.16s/it]
|
| 165 |
20%|█▉ | 139/711 [15:10<49:05, 5.15s/it]
|
| 166 |
20%|█▉ | 140/711 [15:16<49:19, 5.18s/it]
|
| 167 |
|
| 168 |
20%|█▉ | 140/711 [15:16<49:19, 5.18s/it]
|
| 169 |
20%|█▉ | 141/711 [15:21<49:04, 5.17s/it]
|
| 170 |
20%|█▉ | 142/711 [15:26<48:43, 5.14s/it]
|
| 171 |
20%|██ | 143/711 [15:31<48:31, 5.13s/it]
|
| 172 |
20%|██ | 144/711 [15:36<48:59, 5.19s/it]
|
| 173 |
20%|██ | 145/711 [15:41<48:54, 5.19s/it]
|
| 174 |
21%|██ | 146/711 [15:46<48:39, 5.17s/it]
|
| 175 |
21%|██ | 147/711 [15:52<48:22, 5.15s/it]
|
| 176 |
21%|██ | 148/7
|
|
|
|
|
|
|
|
|
|
| 177 |
21%|██ | 149/711 [16:02<48:06, 5.14s/it]
|
| 178 |
21%|██ | 150/711 [16:07<48:06, 5.14s/it]
|
| 179 |
|
| 180 |
21%|██ | 150/711 [16:07<48:06, 5.14s/it]
|
| 181 |
21%|██ | 151/711 [16:12<48:00, 5.14s/it]
|
| 182 |
21%|██▏ | 152/711 [16:17<48:35, 5.22s/it]
|
| 183 |
22%|██▏ | 153/711 [16:23<48:21, 5.20s/it]
|
| 184 |
22%|██▏ | 154/711 [16:28<48:00, 5.17s/it]
|
| 185 |
22%|██▏ | 155/711 [16:33<47:54, 5.17s/it]
|
| 186 |
22%|██▏ | 156/711 [16:38<47:42, 5.16s/it]
|
| 187 |
22%|██▏ | 157/711 [16:43<47:36, 5.16s/it]
|
| 188 |
22%|██▏ | 158/711 [16:48<47:27, 5.15s/it]
|
| 189 |
22%|██▏ | 159/711 [16:54<48:09, 5.23s/it]
|
| 190 |
23%|██▎ | 160/711 [16:59<47:40, 5.19s/it]
|
| 191 |
|
| 192 |
23%|██▎ | 160/711 [16:59<47:40, 5.19s/it]
|
| 193 |
23%|██▎ | 161/711 [17:04<47:17, 5.16s/it]
|
| 194 |
23%|██▎ | 162/711 [17:09<47:17, 5.17s/it]
|
| 195 |
23%|�
|
|
|
|
|
|
|
| 196 |
23%|██▎ | 164/711 [17:20<47:49, 5.25s/it]
|
| 197 |
23%|██▎ | 165/711 [17:25<47:18, 5.20s/it]
|
| 198 |
23%|██▎ | 166/711 [17:30<47:54, 5.27s/it]
|
| 199 |
23%|██▎ | 167/711 [17:35<47:21, 5.22s/it]
|
| 200 |
24%|██▎ | 168/711 [17:41<47:10, 5.21s/it]
|
| 201 |
24%|██▍ | 169/711 [17:46<46:57, 5.20s/it]
|
| 202 |
24%|██▍ | 170/711 [17:51<46:38, 5.17s/it]
|
| 203 |
|
| 204 |
24%|██▍ | 170/711 [17:51<46:38, 5.17s/it]
|
| 205 |
24%|██▍ | 171/711 [17:56<46:21, 5.15s/it]
|
| 206 |
24%|██▍ | 172/711 [18:01<46:17, 5.15s/it]
|
| 207 |
24%|██▍ | 173/711 [18:06<46:18, 5.16s/it]
|
| 208 |
24%|██▍ | 174/711 [18:11<46:01, 5.14s/it]
|
| 209 |
25%|██▍ | 175/711 [18:16<45:50, 5.13s/it]
|
| 210 |
25%|██▍ | 176/711 [18:22<45:49, 5.14s/it]
|
| 211 |
25%|██▍ | 177/711 [18:27<45:49, 5.15s/it]
|
| 212 |
25%|██▌ | 178/711 [18:32<46:45, 5.26s/it]
|
| 213 |
25%|██▌ | 179/
|
|
|
|
|
|
|
|
|
|
| 214 |
25%|██▌ | 180/711 [18:43<45:49, 5.18s/it]
|
| 215 |
|
| 216 |
25%|██▌ | 180/711 [18:43<45:49, 5.18s/it]
|
| 217 |
25%|██▌ | 181/711 [18:48<45:33, 5.16s/it]
|
| 218 |
26%|██▌ | 182/711 [18:53<45:22, 5.15s/it]
|
| 219 |
26%|██▌ | 183/711 [18:58<45:10, 5.13s/it]
|
| 220 |
26%|██▌ | 184/711 [19:03<45:33, 5.19s/it]
|
| 221 |
26%|██▌ | 185/711 [19:08<45:10, 5.15s/it]
|
| 222 |
26%|██▌ | 186/711 [19:13<44:56, 5.14s/it]
|
| 223 |
26%|██▋ | 187/711 [19:18<44:44, 5.12s/it]
|
| 224 |
26%|██▋ | 188/711 [19:24<45:35, 5.23s/it]
|
| 225 |
27%|██▋ | 189/711 [19:29<45:10, 5.19s/it]
|
| 226 |
27%|██▋ | 190/711 [19:34<44:59, 5.18s/it]
|
| 227 |
|
| 228 |
27%|██▋ | 190/711 [19:34<44:59, 5.18s/it]
|
| 229 |
27%|██▋ | 191/711 [19:39<44:41, 5.16s/it]
|
| 230 |
27%|██▋ | 192/711 [19:44<44:29, 5.14s/it]
|
| 231 |
27%|██▋ | 193/711 [19:50<44:25, 5.15s/i
|
|
|
|
|
|
|
| 232 |
27%|██▋ | 194/711 [19:55<44:10, 5.13s/it]
|
| 233 |
27%|██▋ | 195/711 [20:00<44:25, 5.17s/it]
|
| 234 |
28%|██▊ | 196/711 [20:05<44:10, 5.15s/it]
|
| 235 |
28%|██▊ | 197/711 [20:10<44:27, 5.19s/it]
|
| 236 |
28%|██▊ | 198/711 [20:16<45:08, 5.28s/it]
|
| 237 |
28%|██▊ | 199/711 [20:21<44:34, 5.22s/it]
|
| 238 |
28%|██▊ | 200/711 [20:26<44:17, 5.20s/it]
|
| 239 |
|
| 240 |
28%|██▊ | 200/711 [20:26<44:17, 5.20s/it]
|
| 241 |
28%|██▊ | 201/711 [20:31<43:55, 5.17s/it]
|
| 242 |
28%|██▊ | 202/711 [20:36<43:35, 5.14s/it]
|
| 243 |
29%|██▊ | 203/711 [20:41<43:37, 5.15s/it]
|
| 244 |
29%|██▊ | 204/711 [20:47<44:14, 5.24s/it]
|
| 245 |
29%|██▉ | 205/711 [20:52<44:01, 5.22s/it]
|
| 246 |
29%|██▉ | 206/711 [20:57<43:43, 5.20s/it]
|
| 247 |
29%|██▉ | 207/711 [21:02<43:30, 5.18s/it]
|
| 248 |
29%|██▉ | 208/711 [21:07<43:14, 5.16s/it]
|
| 249 |
29%|██▉ | 209/711 [21:12<43:02, 5.15s/it]
|
| 250 |
30%|██▉
|
|
|
|
|
|
|
|
|
|
| 251 |
|
| 252 |
30%|██▉ | 210/711 [21:18<42:50, 5.13s/it]
|
| 253 |
30%|██▉ | 211/711 [21:23<42:40, 5.12s/it]
|
| 254 |
30%|██▉ | 212/711 [21:28<42:38, 5.13s/it]
|
| 255 |
30%|██▉ | 213/711 [21:33<42:38, 5.14s/it]
|
| 256 |
30%|███ | 214/711 [21:38<42:36, 5.14s/it]
|
| 257 |
30%|███ | 215/711 [21:43<42:24, 5.13s/it]
|
| 258 |
30%|███ | 216/711 [21:48<42:15, 5.12s/it]
|
| 259 |
31%|███ | 217/711 [21:53<42:11, 5.12s/it]
|
| 260 |
31%|███ | 218/711 [21:59<42:01, 5.12s/it]
|
| 261 |
31%|███ | 219/711 [22:04<42:01, 5.12s/it]
|
| 262 |
31%|███ | 220/711 [22:09<41:50, 5.11s/it]
|
| 263 |
|
| 264 |
31%|███ | 220/711 [22:09<41:50, 5.11s/it]
|
| 265 |
31%|███ | 221/711 [22:14<42:03, 5.15s/it]
|
| 266 |
31%|███ | 222/711 [22:19<42:36, 5.23s/it]
|
| 267 |
31%|███▏ | 223/711 [22:25<42:39, 5.25s/it]
|
| 268 |
32%|███▏ | 224/711 [22:30<42
|
|
|
|
|
|
|
| 269 |
32%|███▏ | 225/711 [22:35<42:13, 5.21s/it]
|
| 270 |
32%|███▏ | 226/711 [22:40<42:40, 5.28s/it]
|
| 271 |
32%|███▏ | 227/711 [22:46<42:11, 5.23s/it]
|
| 272 |
32%|███▏ | 228/711 [22:51<41:45, 5.19s/it]
|
| 273 |
32%|███▏ | 229/711 [22:56<41:33, 5.17s/it]
|
| 274 |
32%|███▏ | 230/711 [23:01<41:16, 5.15s/it]
|
| 275 |
|
| 276 |
32%|███▏ | 230/711 [23:01<41:16, 5.15s/it]
|
| 277 |
32%|███▏ | 231/711 [23:06<41:02, 5.13s/it]
|
| 278 |
33%|███▎ | 232/711 [23:11<40:52, 5.12s/it]
|
| 279 |
33%|███▎ | 233/711 [23:16<40:57, 5.14s/it]
|
| 280 |
33%|███▎ | 234/711 [23:21<40:58, 5.15s/it]
|
| 281 |
33%|███▎ | 235/711 [23:27<40:52, 5.15s/it]
|
| 282 |
33%|███▎ | 236/711 [23:32<40:41, 5.14s/it]
|
| 283 |
33%|███▎ | 237/711 [23:37<40:32, 5.13s/it]
|
| 284 |
33%|███▎ | 238/711 [23:42<40:21, 5.12s/it]
|
| 285 |
34%|███▎ | 239/711 [23:47<40:22, 5.13s/it]
|
| 286 |
34%|███▍ | 240/
|
|
|
|
|
|
|
|
|
|
| 287 |
|
| 288 |
34%|███▍ | 240/711 [23:52<40:14, 5.13s/it]
|
| 289 |
34%|███▍ | 241/711 [23:57<40:04, 5.12s/it]
|
| 290 |
34%|███▍ | 242/711 [24:03<40:30, 5.18s/it]
|
| 291 |
34%|███▍ | 243/711 [24:08<40:55, 5.25s/it]
|
| 292 |
34%|███▍ | 244/711 [24:13<40:38, 5.22s/it]
|
| 293 |
34%|███▍ | 245/711 [24:18<40:16, 5.18s/it]
|
| 294 |
35%|███▍ | 246/711 [24:23<40:06, 5.18s/it]
|
| 295 |
35%|███▍ | 247/711 [24:29<40:20, 5.22s/it]
|
| 296 |
35%|███▍ | 248/711 [24:34<39:58, 5.18s/it]
|
| 297 |
35%|███▌ | 249/711 [24:39<39:40, 5.15s/it]
|
| 298 |
35%|███▌ | 250/711 [24:44<39:34, 5.15s/it]
|
| 299 |
|
| 300 |
35%|███▌ | 250/711 [24:44<39:34, 5.15s/it]
|
| 301 |
35%|███▌ | 251/711 [24:49<39:23, 5.14s/it]
|
| 302 |
35%|███▌ | 252/711 [24:54<39:08, 5.12s/it]
|
| 303 |
36%|███▌ | 253/711 [24:59<39:13, 5.14s/it]
|
| 304 |
36%|███▌
|
|
|
|
|
|
|
| 305 |
36%|███▌ | 255/711 [25:10<39:07, 5.15s/it]
|
| 306 |
36%|███▌ | 256/711 [25:15<39:17, 5.18s/it]
|
| 307 |
36%|███▌ | 257/711 [25:20<38:57, 5.15s/it]
|
| 308 |
36%|███▋ | 258/711 [25:26<39:48, 5.27s/it]
|
| 309 |
36%|███▋ | 259/711 [25:31<39:17, 5.22s/it]
|
| 310 |
37%|███▋ | 260/711 [25:36<38:58, 5.19s/it]
|
| 311 |
|
| 312 |
37%|███▋ | 260/711 [25:36<38:58, 5.19s/it]
|
| 313 |
37%|███▋ | 261/711 [25:41<38:40, 5.16s/it]
|
| 314 |
37%|███▋ | 262/711 [25:46<38:26, 5.14s/it]
|
| 315 |
37%|███▋ | 263/711 [25:51<38:26, 5.15s/it]
|
| 316 |
37%|███▋ | 264/711 [25:56<38:28, 5.16s/it]
|
| 317 |
37%|███▋ | 265/711 [26:02<38:16, 5.15s/it]
|
| 318 |
37%|███▋ | 266/711 [26:07<38:12, 5.15s/it]
|
| 319 |
38%|███▊ | 267/711 [26:12<38:50, 5.25s/it]
|
| 320 |
38%|███▊ | 268/711 [26:18<39:17, 5.32s/it]
|
| 321 |
38%|███▊ | 269/711 [26:23<38:44, 5.26s/it]
|
| 322 |
38%|█�
|
|
|
|
|
|
|
|
|
|
|
|
|
| 323 |
|
| 324 |
38%|███▊ | 270/711 [26:28<38:17, 5.21s/it]
|
| 325 |
38%|███▊ | 271/711 [26:33<37:58, 5.18s/it]
|
| 326 |
38%|███▊ | 272/711 [26:38<37:42, 5.15s/it]
|
| 327 |
38%|███▊ | 273/711 [26:43<37:32, 5.14s/it]
|
| 328 |
39%|███▊ | 274/711 [26:49<39:58, 5.49s/it]
|
| 329 |
39%|███▊ | 275/711 [26:55<39:08, 5.39s/it]
|
| 330 |
39%|███▉ | 276/711 [27:00<38:28, 5.31s/it]
|
| 331 |
39%|███▉ | 277/711 [27:05<38:44, 5.36s/it]
|
| 332 |
39%|███▉ | 278/711 [27:10<38:16, 5.30s/it]
|
| 333 |
39%|███▉ | 279/711 [27:16<37:45, 5.24s/it]
|
| 334 |
39%|███▉ | 280/711 [27:21<37:26, 5.21s/it]
|
| 335 |
|
| 336 |
39%|███▉ | 280/711 [27:21<37:26, 5.21s/it]
|
| 337 |
40%|███▉ | 281/711 [27:26<38:16, 5.34s/it]
|
| 338 |
40%|███▉ | 282/711 [27:31<37:40, 5.27s/it]
|
| 339 |
40%|███▉ | 283/711 [27:37<37:37, 5.28s/it]
|
| 340 |
40%
|
|
|
|
|
|
|
| 341 |
40%|████ | 285/711 [27:47<37:34, 5.29s/it]
|
| 342 |
40%|████ | 286/711 [27:53<37:14, 5.26s/it]
|
| 343 |
40%|████ | 287/711 [27:58<37:31, 5.31s/it]
|
| 344 |
41%|████ | 288/711 [28:03<37:04, 5.26s/it]
|
| 345 |
41%|████ | 289/711 [28:08<36:40, 5.21s/it]
|
| 346 |
41%|████ | 290/711 [28:13<36:19, 5.18s/it]
|
| 347 |
|
| 348 |
41%|████ | 290/711 [28:13<36:19, 5.18s/it]
|
| 349 |
41%|████ | 291/711 [28:19<36:32, 5.22s/it]
|
| 350 |
41%|████ | 292/711 [28:24<36:57, 5.29s/it]
|
| 351 |
41%|████ | 293/711 [28:29<36:57, 5.31s/it]
|
| 352 |
41%|████▏ | 294/711 [28:35<36:36, 5.27s/it]
|
| 353 |
41%|████▏ | 295/711 [28:40<36:18, 5.24s/it]
|
| 354 |
42%|████▏ | 296/711 [28:45<35:59, 5.20s/it]
|
| 355 |
42%|████▏ | 297/711 [28:50<36:22, 5.27s/it]
|
| 356 |
42%|████▏ | 298/711 [28:55<36:02, 5.24s/it]
|
| 357 |
42%|████▏ | 299/711 [29:
|
|
|
|
|
|
|
|
|
|
| 358 |
42%|████▏ | 300/711 [29:06<36:01, 5.26s/it]
|
| 359 |
|
| 360 |
42%|████▏ | 300/711 [29:06<36:01, 5.26s/it]
|
| 361 |
42%|████▏ | 301/711 [29:11<35:42, 5.23s/it]
|
| 362 |
42%|████▏ | 302/711 [29:16<35:28, 5.20s/it]
|
| 363 |
43%|████▎ | 303/711 [29:21<35:11, 5.17s/it]
|
| 364 |
43%|████▎ | 304/711 [29:27<35:03, 5.17s/it]
|
| 365 |
43%|████▎ | 305/711 [29:32<35:16, 5.21s/it]
|
| 366 |
43%|████▎ | 306/711 [29:37<34:57, 5.18s/it]
|
| 367 |
43%|████▎ | 307/711 [29:42<34:42, 5.15s/it]
|
| 368 |
43%|████▎ | 308/711 [29:47<34:33, 5.15s/it]
|
| 369 |
43%|████▎ | 309/711 [29:53<35:23, 5.28s/it]
|
| 370 |
44%|████▎ | 310/711 [29:59<36:23, 5.45s/it]
|
| 371 |
|
| 372 |
44%|████▎ | 310/711 [29:59<36:23, 5.45s/it]
|
| 373 |
44%|████▎ | 311/711 [30:04<35:40, 5.35s/it]
|
| 374 |
44%|████▍ | 312/711 [30:09<35:47, 5.38s/it]
|
| 375 |
|
|
|
|
|
|
|
| 376 |
44%|████▍ | 314/711 [30:20<35:18, 5.34s/it]
|
| 377 |
44%|████▍ | 315/711 [30:25<35:09, 5.33s/it]
|
| 378 |
44%|████▍ | 316/711 [30:30<34:42, 5.27s/it]
|
| 379 |
45%|████▍ | 317/711 [30:35<34:17, 5.22s/it]
|
| 380 |
45%|████▍ | 318/711 [30:40<34:02, 5.20s/it]
|
| 381 |
45%|████▍ | 319/711 [30:46<33:48, 5.18s/it]
|
| 382 |
45%|████▌ | 320/711 [30:51<33:36, 5.16s/it]
|
| 383 |
|
| 384 |
45%|████▌ | 320/711 [30:51<33:36, 5.16s/it]
|
| 385 |
45%|████▌ | 321/711 [30:56<33:30, 5.16s/it]
|
| 386 |
45%|████▌ | 322/711 [31:01<33:19, 5.14s/it]
|
| 387 |
45%|████▌ | 323/711 [31:06<33:37, 5.20s/it]
|
| 388 |
46%|████▌ | 324/711 [31:11<33:25, 5.18s/it]
|
| 389 |
46%|████▌ | 325/711 [31:17<33:17, 5.17s/it]
|
| 390 |
46%|████▌ | 326/711 [31:22<33:09, 5.17s/it]
|
| 391 |
46%|████▌ | 327/711 [31:27<33:08, 5.18s/it]
|
| 392 |
46%|███
|
|
|
|
|
|
|
|
|
|
| 393 |
46%|████▋ | 329/711 [31:37<32:44, 5.14s/it]
|
| 394 |
46%|████▋ | 330/711 [31:42<32:58, 5.19s/it]
|
| 395 |
|
| 396 |
46%|████▋ | 330/711 [31:42<32:58, 5.19s/it]
|
| 397 |
47%|████▋ | 331/711 [31:48<32:46, 5.18s/it]
|
| 398 |
47%|████▋ | 332/711 [31:53<32:35, 5.16s/it]
|
| 399 |
47%|████▋ | 333/711 [31:58<33:05, 5.25s/it]
|
| 400 |
47%|████▋ | 334/711 [32:03<32:47, 5.22s/it]
|
| 401 |
47%|████▋ | 335/711 [32:09<33:10, 5.29s/it]
|
| 402 |
47%|████▋ | 336/711 [32:14<32:41, 5.23s/it]
|
| 403 |
47%|████▋ | 337/711 [32:19<32:21, 5.19s/it]
|
| 404 |
48%|████▊ | 338/711 [32:24<32:19, 5.20s/it]
|
| 405 |
48%|████▊ | 339/711 [32:29<32:05, 5.18s/it]
|
| 406 |
48%|████▊ | 340/711 [32:34<31:51, 5.15s/it]
|
| 407 |
|
| 408 |
48%|████▊ | 340/711 [32:34<31:51, 5.15s/it]
|
| 409 |
48%|████▊ | 341/711 [
|
|
|
|
|
|
|
| 410 |
48%|████▊ | 342/711 [32:45<31:45, 5.16s/it]
|
| 411 |
48%|████▊ | 343/711 [32:50<31:37, 5.16s/it]
|
| 412 |
48%|████▊ | 344/711 [32:55<31:44, 5.19s/it]
|
| 413 |
49%|████▊ | 345/711 [33:00<31:29, 5.16s/it]
|
| 414 |
49%|████▊ | 346/711 [33:05<31:18, 5.15s/it]
|
| 415 |
49%|████▉ | 347/711 [33:10<31:07, 5.13s/it]
|
| 416 |
49%|████▉ | 348/711 [33:16<30:59, 5.12s/it]
|
| 417 |
49%|████▉ | 349/711 [33:21<30:57, 5.13s/it]
|
| 418 |
49%|████▉ | 350/711 [33:26<30:52, 5.13s/it]
|
| 419 |
|
| 420 |
49%|████▉ | 350/711 [33:26<30:52, 5.13s/it]
|
| 421 |
49%|████▉ | 351/711 [33:31<30:45, 5.13s/it]
|
| 422 |
50%|████▉ | 352/711 [33:36<30:36, 5.12s/it]
|
| 423 |
50%|████▉ | 353/711 [33:41<30:32, 5.12s/it]
|
| 424 |
50%|████▉ | 354/711 [33:46<30:26, 5.12s/it]
|
| 425 |
50%|████▉ | 355/711 [33:51<30:20, 5.11s/it]
|
| 426 |
50%|█████ | 356/711 [33:57<30:13,
|
|
|
|
|
|
|
| 427 |
50%|█████ | 357/711 [34:02<30:10, 5.11s/it]
|
| 428 |
50%|█████ | 358/711 [34:07<30:07, 5.12s/it]
|
| 429 |
50%|█████ | 359/711 [34:12<30:03, 5.12s/it]
|
| 430 |
51%|█████ | 360/711 [34:17<29:57, 5.12s/it]
|
| 431 |
|
| 432 |
51%|█████ | 360/711 [34:17<29:57, 5.12s/it]
|
| 433 |
51%|█████ | 361/711 [34:22<29:53, 5.12s/it]
|
| 434 |
51%|█████ | 362/711 [34:27<29:46, 5.12s/it]
|
| 435 |
51%|█████ | 363/711 [34:32<29:41, 5.12s/it]
|
| 436 |
51%|█████ | 364/711 [34:38<29:39, 5.13s/it]
|
| 437 |
51%|█████▏ | 365/711 [34:43<29:59, 5.20s/it]
|
| 438 |
51%|█████▏ | 366/711 [34:48<29:44, 5.17s/it]
|
| 439 |
52%|█████▏ | 367/711 [34:53<29:30, 5.15s/it]
|
| 440 |
52%|█████▏ | 368/711 [34:58<29:27, 5.15s/it]
|
| 441 |
52%|█████▏ | 369/711 [35:03<29:14, 5.13s/it]
|
| 442 |
52%|█████▏ | 370/711 [35:09<29:11, 5.14s/it]
|
| 443 |
|
|
|
|
|
|
|
|
|
|
| 444 |
52%|█████▏ | 370/711 [35:09<29:11, 5.14s/it]
|
| 445 |
52%|█████▏ | 371/711 [35:14<29:01, 5.12s/it]
|
| 446 |
52%|█████▏ | 372/711 [35:19<28:54, 5.12s/it]
|
| 447 |
52%|█████▏ | 373/711 [35:24<29:01, 5.15s/it]
|
| 448 |
53%|█████▎ | 374/711 [35:29<28:53, 5.14s/it]
|
| 449 |
53%|█████▎ | 375/711 [35:34<28:43, 5.13s/it]
|
| 450 |
53%|█████▎ | 376/711 [35:39<28:33, 5.11s/it]
|
| 451 |
53%|█████▎ | 377/711 [35:44<28:25, 5.11s/it]
|
| 452 |
53%|█████▎ | 378/711 [35:49<28:21, 5.11s/it]
|
| 453 |
53%|█████▎ | 379/711 [35:55<28:52, 5.22s/it]
|
| 454 |
53%|█████▎ | 380/711 [36:00<28:33, 5.18s/it]
|
| 455 |
|
| 456 |
53%|█████▎ | 380/711 [36:00<28:33, 5.18s/it]
|
| 457 |
54%|█████▎ | 381/711 [36:05<28:53, 5.25s/it]
|
| 458 |
54%|█████▎ | 382/711 [36:11<28:32, 5.21s/it]
|
| 459 |
54%|█████▍ | 383/711 [36:16<28:38, 5.24s/it]
|
| 460 |
54%|█████▍ | 384/711 [36:2
|
|
|
|
|
|
|
| 461 |
54%|█████▍ | 385/711 [36:26<28:02, 5.16s/it]
|
| 462 |
54%|█████▍ | 386/711 [36:31<27:52, 5.15s/it]
|
| 463 |
54%|█████▍ | 387/711 [36:36<27:46, 5.14s/it]
|
| 464 |
55%|█████▍ | 388/711 [36:41<27:35, 5.12s/it]
|
| 465 |
55%|█████▍ | 389/711 [36:47<27:35, 5.14s/it]
|
| 466 |
55%|█████▍ | 390/711 [36:52<27:38, 5.17s/it]
|
| 467 |
|
| 468 |
55%|█████▍ | 390/711 [36:52<27:38, 5.17s/it]
|
| 469 |
55%|█████▍ | 391/711 [36:57<27:26, 5.14s/it]
|
| 470 |
55%|█████▌ | 392/711 [37:02<27:35, 5.19s/it]
|
| 471 |
55%|█████▌ | 393/711 [37:07<27:27, 5.18s/it]
|
| 472 |
55%|█████▌ | 394/711 [37:12<27:12, 5.15s/it]
|
| 473 |
56%|█████▌ | 395/711 [37:18<27:39, 5.25s/it]
|
| 474 |
56%|█████▌ | 396/711 [37:23<27:19, 5.20s/it]
|
| 475 |
56%|█████▌ | 397/711 [37:28<27:03, 5.17s/it]
|
| 476 |
56%|█████▌ | 398/711 [37:33<26:52, 5.15s/it]
|
| 477 |
56%|█████▌
|
|
|
|
|
|
|
|
|
|
| 478 |
56%|█████▋ | 400/711 [37:43<26:35, 5.13s/it]
|
| 479 |
|
| 480 |
56%|█████▋ | 400/711 [37:43<26:35, 5.13s/it]
|
| 481 |
56%|█████▋ | 401/711 [37:48<26:27, 5.12s/it]
|
| 482 |
57%|█████▋ | 402/711 [37:54<26:18, 5.11s/it]
|
| 483 |
57%|█████▋ | 403/711 [37:59<26:11, 5.10s/it]
|
| 484 |
57%|█████▋ | 404/711 [38:04<26:10, 5.11s/it]
|
| 485 |
57%|█████▋ | 405/711 [38:09<26:34, 5.21s/it]
|
| 486 |
57%|█████▋ | 406/711 [38:14<26:25, 5.20s/it]
|
| 487 |
57%|█████▋ | 407/711 [38:19<26:11, 5.17s/it]
|
| 488 |
57%|█████▋ | 408/711 [38:25<26:03, 5.16s/it]
|
| 489 |
58%|█████▊ | 409/711 [38:30<26:30, 5.27s/it]
|
| 490 |
58%|█████▊ | 410/711 [38:35<26:14, 5.23s/it]
|
| 491 |
|
| 492 |
58%|█████▊ | 410/711 [38:35<26:14, 5.23s/it]
|
| 493 |
58%|█████▊ | 411/711 [38:41<26:30, 5.30s/it]
|
| 494 |
58%|███�
|
|
|
|
|
|
|
| 495 |
58%|█████▊ | 413/711 [38:51<25:55, 5.22s/it]
|
| 496 |
58%|█████▊ | 414/711 [38:56<25:40, 5.19s/it]
|
| 497 |
58%|█████▊ | 415/711 [39:01<25:28, 5.16s/it]
|
| 498 |
59%|█████▊ | 416/711 [39:07<25:49, 5.25s/it]
|
| 499 |
59%|█████▊ | 417/711 [39:12<25:31, 5.21s/it]
|
| 500 |
59%|█████▉ | 418/711 [39:17<25:17, 5.18s/it]
|
| 501 |
59%|█████▉ | 419/711 [39:22<25:06, 5.16s/it]
|
| 502 |
59%|█████▉ | 420/711 [39:27<24:59, 5.15s/it]
|
| 503 |
|
| 504 |
59%|█████▉ | 420/711 [39:27<24:59, 5.15s/it]
|
| 505 |
59%|█████▉ | 421/711 [39:32<24:49, 5.14s/it]
|
| 506 |
59%|█████▉ | 422/711 [39:37<24:44, 5.14s/it]
|
| 507 |
59%|█████▉ | 423/711 [39:43<24:35, 5.12s/it]
|
| 508 |
60%|█████▉ | 424/711 [39:48<24:42, 5.17s/it]
|
| 509 |
60%|█████▉ | 425/711 [39:53<24:34, 5.16s/it]
|
| 510 |
60%|█████▉ | 426/711 [39:58<24:30, 5.16s/
|
|
|
|
|
|
|
| 511 |
60%|██████ | 427/711 [40:03<24:40, 5.21s/it]
|
| 512 |
60%|██████ | 428/711 [40:09<24:55, 5.28s/it]
|
| 513 |
60%|██████ | 429/711 [40:14<24:33, 5.23s/it]
|
| 514 |
60%|██████ | 430/711 [40:19<24:23, 5.21s/it]
|
| 515 |
|
| 516 |
60%|██████ | 430/711 [40:19<24:23, 5.21s/it]
|
| 517 |
61%|██████ | 431/711 [40:24<24:10, 5.18s/it]
|
| 518 |
61%|██████ | 432/711 [40:29<23:56, 5.15s/it]
|
| 519 |
61%|██████ | 433/711 [40:34<23:50, 5.14s/it]
|
| 520 |
61%|██████ | 434/711 [40:40<23:39, 5.12s/it]
|
| 521 |
61%|██████ | 435/711 [40:45<23:32, 5.12s/it]
|
| 522 |
61%|██████▏ | 436/711 [40:50<24:14, 5.29s/it]
|
| 523 |
61%|██████▏ | 437/711 [40:56<24:11, 5.30s/it]
|
| 524 |
62%|██████▏ | 438/711 [41:01<23:49, 5.24s/it]
|
| 525 |
62%|██████▏ | 439/711 [41:06<23:33, 5.20s/it]
|
| 526 |
62%|██████▏ | 440/711 [41:11<23:37, 5.23s/it]
|
| 527 |
|
|
|
|
|
|
|
|
|
|
| 528 |
62%|██████▏ | 440/711 [41:11<23:37, 5.23s/it]
|
| 529 |
62%|██████▏ | 441/711 [41:16<23:23, 5.20s/it]
|
| 530 |
62%|██████▏ | 442/711 [41:21<23:13, 5.18s/it]
|
| 531 |
62%|██████▏ | 443/711 [41:27<23:01, 5.16s/it]
|
| 532 |
62%|██████▏ | 444/711 [41:32<22:57, 5.16s/it]
|
| 533 |
63%|██████▎ | 445/711 [41:37<23:14, 5.24s/it]
|
| 534 |
63%|██████▎ | 446/711 [41:42<22:58, 5.20s/it]
|
| 535 |
63%|██████▎ | 447/711 [41:47<22:45, 5.17s/it]
|
| 536 |
63%|██████▎ | 448/711 [41:52<22:34, 5.15s/it]
|
| 537 |
63%|██████▎ | 449/711 [41:58<22:43, 5.20s/it]
|
| 538 |
63%|██████▎ | 450/711 [42:03<22:32, 5.18s/it]
|
| 539 |
|
| 540 |
63%|██████▎ | 450/711 [42:03<22:32, 5.18s/it]
|
| 541 |
63%|██████▎ | 451/711 [42:08<22:20, 5.16s/it]
|
| 542 |
64%|██████▎ | 452/711 [42:13<22:14, 5.15s/it]
|
| 543 |
64%|██████▎ | 453/711 [42:18<22:09, 5
|
|
|
|
|
|
|
| 544 |
64%|██████▍ | 454/711 [42:23<22:03, 5.15s/it]
|
| 545 |
64%|██████▍ | 455/711 [42:29<21:56, 5.14s/it]
|
| 546 |
64%|██████▍ | 456/711 [42:34<21:51, 5.14s/it]
|
| 547 |
64%|██████▍ | 457/711 [42:39<21:44, 5.14s/it]
|
| 548 |
64%|██████▍ | 458/711 [42:44<21:40, 5.14s/it]
|
| 549 |
65%|██████▍ | 459/711 [42:49<21:39, 5.16s/it]
|
| 550 |
65%|██████▍ | 460/711 [42:54<21:46, 5.21s/it]
|
| 551 |
|
| 552 |
65%|██████▍ | 460/711 [42:54<21:46, 5.21s/it]
|
| 553 |
65%|██████▍ | 461/711 [43:00<21:38, 5.19s/it]
|
| 554 |
65%|██████▍ | 462/711 [43:05<21:30, 5.18s/it]
|
| 555 |
65%|██████▌ | 463/711 [43:10<21:25, 5.18s/it]
|
| 556 |
65%|██████▌ | 464/711 [43:15<21:17, 5.17s/it]
|
| 557 |
65%|██████▌ | 465/711 [43:20<21:05, 5.15s/it]
|
| 558 |
66%|██████▌ | 466/711 [43:25<20:58, 5.14s/it]
|
| 559 |
66%|██████▌ | 467/711 [43:30<20:53, 5.14s/it]
|
| 560 |
66%
|
|
|
|
|
|
|
|
|
|
| 561 |
66%|██████▌ | 469/711 [43:41<21:10, 5.25s/it]
|
| 562 |
66%|██████▌ | 470/711 [43:46<20:53, 5.20s/it]
|
| 563 |
|
| 564 |
66%|██████▌ | 470/711 [43:46<20:53, 5.20s/it]
|
| 565 |
66%|██████▌ | 471/711 [43:51<20:45, 5.19s/it]
|
| 566 |
66%|██████▋ | 472/711 [43:57<20:40, 5.19s/it]
|
| 567 |
67%|██████▋ | 473/711 [44:02<20:28, 5.16s/it]
|
| 568 |
67%|██████▋ | 474/711 [44:07<20:23, 5.16s/it]
|
| 569 |
67%|██████▋ | 475/711 [44:12<20:12, 5.14s/it]
|
| 570 |
67%|██████▋ | 476/711 [44:18<20:43, 5.29s/it]
|
| 571 |
67%|██████▋ | 477/711 [44:23<20:23, 5.23s/it]
|
| 572 |
67%|██████▋ | 478/711 [44:28<20:24, 5.26s/it]
|
| 573 |
67%|██████▋ | 479/711 [44:33<20:10, 5.22s/it]
|
| 574 |
68%|██████▊ | 480/711 [44:38<19:59, 5.19s/it]
|
| 575 |
|
| 576 |
68%|██████▊ |
|
|
|
|
|
|
|
| 577 |
68%|██████▊ | 481/711 [44:44<20:13, 5.28s/it]
|
| 578 |
68%|██████▊ | 482/711 [44:49<20:00, 5.24s/it]
|
| 579 |
68%|██████▊ | 483/711 [44:54<19:48, 5.21s/it]
|
| 580 |
68%|██████▊ | 484/711 [44:59<19:39, 5.20s/it]
|
| 581 |
68%|██████▊ | 485/711 [45:04<19:33, 5.19s/it]
|
| 582 |
68%|██████▊ | 486/711 [45:10<19:26, 5.18s/it]
|
| 583 |
68%|██████▊ | 487/711 [45:15<19:19, 5.18s/it]
|
| 584 |
69%|██████▊ | 488/711 [45:20<19:09, 5.15s/it]
|
| 585 |
69%|██████▉ | 489/711 [45:25<19:24, 5.25s/it]
|
| 586 |
69%|██████▉ | 490/711 [45:30<19:09, 5.20s/it]
|
| 587 |
|
| 588 |
69%|██████▉ | 490/711 [45:30<19:09, 5.20s/it]
|
| 589 |
69%|██████▉ | 491/711 [45:36<19:16, 5.26s/it]
|
| 590 |
69%|██████▉ | 492/711 [45:41<19:00, 5.21s/it]
|
| 591 |
69%|██████▉ | 493/711 [45:46<18:47, 5.17s/it]
|
| 592 |
69%|██████▉ | 494/711 [45:
|
|
|
|
|
|
|
| 593 |
70%|██████▉ | 495/711 [45:56<18:45, 5.21s/it]
|
| 594 |
70%|██████▉ | 496/711 [46:02<18:34, 5.19s/it]
|
| 595 |
70%|██████▉ | 497/711 [46:07<18:38, 5.22s/it]
|
| 596 |
70%|███████ | 498/711 [46:12<18:25, 5.19s/it]
|
| 597 |
70%|███████ | 499/711 [46:18<18:55, 5.35s/it]
|
| 598 |
70%|███████ | 500/711 [46:23<18:34, 5.28s/it]
|
| 599 |
|
| 600 |
70%|███████ | 500/711 [46:23<18:34, 5.28s/it]
|
| 601 |
70%|███████ | 501/711 [46:28<18:22, 5.25s/it]
|
| 602 |
71%|███████ | 502/711 [46:33<18:12, 5.23s/it]
|
| 603 |
71%|███████ | 503/711 [46:38<18:04, 5.21s/it]
|
| 604 |
71%|███████ | 504/711 [46:44<17:53, 5.19s/it]
|
| 605 |
71%|███████ | 505/711 [46:49<17:43, 5.16s/it]
|
| 606 |
71%|███████ | 506/711 [46:54<17:36, 5.16s/it]
|
| 607 |
71%|███████▏ | 507/711 [46:59<17:29, 5.14s/it]
|
| 608 |
71%|███████▏ | 508/711 [47:04<17:21,
|
|
|
|
|
|
|
|
|
|
| 609 |
72%|███████▏ | 509/711 [47:09<17:16, 5.13s/it]
|
| 610 |
72%|███████▏ | 510/711 [47:14<17:09, 5.12s/it]
|
| 611 |
|
| 612 |
72%|███████▏ | 510/711 [47:14<17:09, 5.12s/it]
|
| 613 |
72%|███████▏ | 511/711 [47:20<17:21, 5.21s/it]
|
| 614 |
72%|███████▏ | 512/711 [47:25<17:12, 5.19s/it]
|
| 615 |
72%|███████▏ | 513/711 [47:30<17:00, 5.16s/it]
|
| 616 |
72%|███████▏ | 514/711 [47:35<16:51, 5.14s/it]
|
| 617 |
72%|███████▏ | 515/711 [47:40<16:45, 5.13s/it]
|
| 618 |
73%|███████▎ | 516/711 [47:45<16:40, 5.13s/it]
|
| 619 |
73%|███████▎ | 517/711 [47:50<16:32, 5.12s/it]
|
| 620 |
73%|███████▎ | 518/711 [47:55<16:30, 5.13s/it]
|
| 621 |
73%|███████▎ | 519/711 [48:01<16:25, 5.13s/it]
|
| 622 |
73%|███████▎ | 520/711 [48:06<16:42, 5.25s/it]
|
| 623 |
|
| 624 |
73%|███████▎ | 520/711 [48:06<16:4
|
|
|
|
|
|
|
| 625 |
73%|███████▎ | 521/711 [48:11<16:30, 5.21s/it]
|
| 626 |
73%|███████▎ | 522/711 [48:16<16:18, 5.18s/it]
|
| 627 |
74%|███████▎ | 523/711 [48:22<16:20, 5.22s/it]
|
| 628 |
74%|███████▎ | 524/711 [48:27<16:36, 5.33s/it]
|
| 629 |
74%|███████▍ | 525/711 [48:32<16:20, 5.27s/it]
|
| 630 |
74%|███████▍ | 526/711 [48:37<16:07, 5.23s/it]
|
| 631 |
74%|███████▍ | 527/711 [48:43<15:53, 5.18s/it]
|
| 632 |
74%|███████▍ | 528/711 [48:48<15:54, 5.22s/it]
|
| 633 |
74%|███████▍ | 529/711 [48:53<15:42, 5.18s/it]
|
| 634 |
75%|███████▍ | 530/711 [48:58<15:38, 5.18s/it]
|
| 635 |
|
| 636 |
75%|███████▍ | 530/711 [48:58<15:38, 5.18s/it]
|
| 637 |
75%|███████▍ | 531/711 [49:03<15:27, 5.15s/it]
|
| 638 |
75%|███████▍ | 532/711 [49:08<15:19, 5.14s/it]
|
| 639 |
75%|███████▍ | 533/711 [49:13<15:11, 5.12s/it]
|
| 640 |
75%|███████▌ | 53
|
|
|
|
|
|
|
| 641 |
75%|███████▌ | 535/711 [49:24<14:58, 5.11s/it]
|
| 642 |
75%|███████▌ | 536/711 [49:29<15:04, 5.17s/it]
|
| 643 |
76%|███████▌ | 537/711 [49:34<14:59, 5.17s/it]
|
| 644 |
76%|███████▌ | 538/711 [49:39<14:48, 5.14s/it]
|
| 645 |
76%|███████▌ | 539/711 [49:44<14:46, 5.16s/it]
|
| 646 |
76%|███████▌ | 540/711 [49:49<14:41, 5.15s/it]
|
| 647 |
|
| 648 |
76%|███████▌ | 540/711 [49:49<14:41, 5.15s/it]
|
| 649 |
76%|███████▌ | 541/711 [49:55<14:33, 5.14s/it]
|
| 650 |
76%|███████▌ | 542/711 [50:00<14:26, 5.13s/it]
|
| 651 |
76%|███████▋ | 543/711 [50:05<14:23, 5.14s/it]
|
| 652 |
77%|███████▋ | 544/711 [50:10<14:17, 5.13s/it]
|
| 653 |
77%|███████▋ | 545/711 [50:15<14:10, 5.12s/it]
|
| 654 |
77%|███████▋ | 546/711 [50:20<14:04, 5.12s/it]
|
| 655 |
77%|███████▋ | 547/711 [50:25<13:59, 5.12s/it]
|
| 656 |
77%|████�
|
|
|
|
|
|
|
|
|
|
| 657 |
77%|███████▋ | 549/711 [50:35<13:47, 5.11s/it]
|
| 658 |
77%|███████▋ | 550/711 [50:41<13:42, 5.11s/it]
|
| 659 |
|
| 660 |
77%|███████▋ | 550/711 [50:41<13:42, 5.11s/it]
|
| 661 |
77%|███████▋ | 551/711 [50:46<13:40, 5.13s/it]
|
| 662 |
78%|███████▊ | 552/711 [50:51<13:36, 5.13s/it]
|
| 663 |
78%|███████▊ | 553/711 [50:56<13:29, 5.12s/it]
|
| 664 |
78%|███████▊ | 554/711 [51:01<13:24, 5.12s/it]
|
| 665 |
78%|███████▊ | 555/711 [51:06<13:21, 5.14s/it]
|
| 666 |
78%|███████▊ | 556/711 [51:11<13:14, 5.12s/it]
|
| 667 |
78%|███████▊ | 557/711 [51:17<13:25, 5.23s/it]
|
| 668 |
78%|███████▊ | 558/711 [51:22<13:32, 5.31s/it]
|
| 669 |
79%|███████▊ | 559/711 [51:27<13:18, 5.25s/it]
|
| 670 |
79%|███████▉ | 560/711 [51:33<13:06, 5.21s/it]
|
| 671 |
|
| 672 |
79%|███�
|
|
|
|
|
|
|
| 673 |
79%|███████▉ | 561/711 [51:38<12:58, 5.19s/it]
|
| 674 |
79%|███████▉ | 562/711 [51:43<13:13, 5.33s/it]
|
| 675 |
79%|███████▉ | 563/711 [51:49<13:11, 5.35s/it]
|
| 676 |
79%|███████▉ | 564/711 [51:54<12:58, 5.29s/it]
|
| 677 |
79%|███████▉ | 565/711 [51:59<12:47, 5.26s/it]
|
| 678 |
80%|███████▉ | 566/711 [52:04<12:34, 5.21s/it]
|
| 679 |
80%|███████▉ | 567/711 [52:10<12:40, 5.28s/it]
|
| 680 |
80%|███████▉ | 568/711 [52:15<12:28, 5.23s/it]
|
| 681 |
80%|████████ | 569/711 [52:20<12:27, 5.26s/it]
|
| 682 |
80%|████████ | 570/711 [52:25<12:14, 5.21s/it]
|
| 683 |
|
| 684 |
80%|████████ | 570/711 [52:25<12:14, 5.21s/it]
|
| 685 |
80%|████████ | 571/711 [52:31<12:14, 5.24s/it]
|
| 686 |
80%|████████ | 572/711 [52:36<12:04, 5.21s/it]
|
| 687 |
81%|████████ | 573/711 [52:41<12:02, 5.24s/it]
|
|
|
|
|
|
|
| 688 |
81%|████████ | 574/711 [52:46<11:53, 5.21s/it]
|
| 689 |
81%|████████ | 575/711 [52:51<11:44, 5.18s/it]
|
| 690 |
81%|████████ | 576/711 [52:56<11:38, 5.17s/it]
|
| 691 |
81%|████████ | 577/711 [53:02<11:46, 5.27s/it]
|
| 692 |
81%|████████▏ | 578/711 [53:07<11:49, 5.33s/it]
|
| 693 |
81%|████████▏ | 579/711 [53:12<11:34, 5.26s/it]
|
| 694 |
82%|████████▏ | 580/711 [53:18<11:25, 5.23s/it]
|
| 695 |
|
| 696 |
82%|████████▏ | 580/711 [53:18<11:25, 5.23s/it]
|
| 697 |
82%|████████▏ | 581/711 [53:23<11:15, 5.19s/it]
|
| 698 |
82%|████████▏ | 582/711 [53:28<11:06, 5.16s/it]
|
| 699 |
82%|████████▏ | 583/711 [53:33<10:59, 5.15s/it]
|
| 700 |
82%|████████▏ | 584/711 [53:38<10:52, 5.14s/it]
|
| 701 |
82%|████████▏ | 585/711 [53:43<10:46, 5.13s/it]
|
| 702 |
82%|████████▏ | 586/711 [53:48<10:42, 5.14s/it]
|
| 703 |
83%|███████�
|
|
|
|
|
|
|
| 704 |
83%|████████▎ | 588/711 [53:59<10:55, 5.33s/it]
|
| 705 |
83%|████████▎ | 589/711 [54:05<10:40, 5.25s/it]
|
| 706 |
83%|████████▎ | 590/711 [54:10<10:43, 5.32s/it]
|
| 707 |
|
| 708 |
83%|████████▎ | 590/711 [54:10<10:43, 5.32s/it]
|
| 709 |
83%|████████▎ | 591/711 [54:15<10:32, 5.27s/it]
|
| 710 |
83%|████████▎ | 592/711 [54:20<10:23, 5.24s/it]
|
| 711 |
83%|████████▎ | 593/711 [54:25<10:14, 5.21s/it]
|
| 712 |
84%|████████▎ | 594/711 [54:31<10:07, 5.19s/it]
|
| 713 |
84%|████████▎ | 595/711 [54:36<09:59, 5.17s/it]
|
| 714 |
84%|████████▍ | 596/711 [54:41<09:51, 5.14s/it]
|
| 715 |
84%|████████▍ | 597/711 [54:46<09:53, 5.21s/it]
|
| 716 |
84%|████████▍ | 598/711 [54:51<09:45, 5.18s/it]
|
| 717 |
84%|████████▍ | 599/711 [54:57<10:07, 5.42s/it]
|
| 718 |
84%|████████▍ | 600/711 [55:02
|
|
|
|
|
|
|
|
|
|
| 719 |
|
| 720 |
84%|████████▍ | 600/711 [55:02<09:53, 5.35s/it]
|
| 721 |
85%|████████▍ | 601/711 [55:08<09:48, 5.35s/it]
|
| 722 |
85%|████████▍ | 602/711 [55:13<09:35, 5.28s/it]
|
| 723 |
85%|████████▍ | 603/711 [55:18<09:24, 5.23s/it]
|
| 724 |
85%|████████▍ | 604/711 [55:23<09:22, 5.26s/it]
|
| 725 |
85%|████████▌ | 605/711 [55:28<09:12, 5.22s/it]
|
| 726 |
85%|████████▌ | 606/711 [55:34<09:27, 5.41s/it]
|
| 727 |
85%|████████▌ | 607/711 [55:40<09:29, 5.47s/it]
|
| 728 |
86%|████████▌ | 608/711 [55:45<09:17, 5.41s/it]
|
| 729 |
86%|████████▌ | 609/711 [55:50<09:03, 5.32s/it]
|
| 730 |
86%|████████▌ | 610/711 [55:56<08:56, 5.31s/it]
|
| 731 |
|
| 732 |
86%|████████▌ | 610/711 [55:56<08:56, 5.31s/it]
|
| 733 |
86%|████████▌ | 611/711 [56:01<08:45, 5.25s/it]
|
| 734 |
86%|████�
|
|
|
|
|
|
|
| 735 |
86%|████████▌ | 613/711 [56:11<08:34, 5.24s/it]
|
| 736 |
86%|████████▋ | 614/711 [56:16<08:25, 5.22s/it]
|
| 737 |
86%|████████▋ | 615/711 [56:21<08:17, 5.19s/it]
|
| 738 |
87%|████████▋ | 616/711 [56:28<08:41, 5.48s/it]
|
| 739 |
87%|████████▋ | 617/711 [56:33<08:24, 5.37s/it]
|
| 740 |
87%|████████▋ | 618/711 [56:38<08:23, 5.41s/it]
|
| 741 |
87%|████████▋ | 619/711 [56:43<08:09, 5.33s/it]
|
| 742 |
87%|████████▋ | 620/711 [56:49<07:59, 5.27s/it]
|
| 743 |
|
| 744 |
87%|████████▋ | 620/711 [56:49<07:59, 5.27s/it]
|
| 745 |
87%|████████▋ | 621/711 [56:54<07:51, 5.24s/it]
|
| 746 |
87%|████████▋ | 622/711 [56:59<07:43, 5.21s/it]
|
| 747 |
88%|████████▊ | 623/711 [57:04<07:44, 5.28s/it]
|
| 748 |
88%|████████▊ | 624/711 [57:09<07:35, 5.23s/it]
|
| 749 |
88%|████████▊ | 625/7
|
|
|
|
|
|
|
| 750 |
88%|████████▊ | 626/711 [57:20<07:19, 5.17s/it]
|
| 751 |
88%|████████▊ | 627/711 [57:25<07:12, 5.14s/it]
|
| 752 |
88%|████████▊ | 628/711 [57:30<07:07, 5.15s/it]
|
| 753 |
88%|████████▊ | 629/711 [57:35<07:03, 5.16s/it]
|
| 754 |
89%|████████▊ | 630/711 [57:41<07:25, 5.49s/it]
|
| 755 |
|
| 756 |
89%|████████▊ | 630/711 [57:41<07:25, 5.49s/it]
|
| 757 |
89%|████████▊ | 631/711 [57:47<07:13, 5.42s/it]
|
| 758 |
89%|████████▉ | 632/711 [57:52<07:01, 5.33s/it]
|
| 759 |
89%|████████▉ | 633/711 [57:57<06:51, 5.28s/it]
|
| 760 |
89%|████████▉ | 634/711 [58:02<06:42, 5.23s/it]
|
| 761 |
89%|████████▉ | 635/711 [58:07<06:35, 5.21s/it]
|
| 762 |
89%|████████▉ | 636/711 [58:13<06:36, 5.28s/it]
|
| 763 |
90%|████████▉ | 637/711 [58:18<06:26, 5.23s/it]
|
| 764 |
90%|████████▉ | 638/711 [58:23<06:19, 5.2
|
|
|
|
|
|
|
|
|
|
| 765 |
90%|████████▉ | 639/711 [58:28<06:19, 5.28s/it]
|
| 766 |
90%|█████████ | 640/711 [58:33<06:11, 5.24s/it]
|
| 767 |
|
| 768 |
90%|█████████ | 640/711 [58:33<06:11, 5.24s/it]
|
| 769 |
90%|█████████ | 641/711 [58:39<06:10, 5.30s/it]
|
| 770 |
90%|█████████ | 642/711 [58:44<06:02, 5.25s/it]
|
| 771 |
90%|█████████ | 643/711 [58:49<05:53, 5.20s/it]
|
| 772 |
91%|█████████ | 644/711 [58:54<05:46, 5.17s/it]
|
| 773 |
91%|█████████ | 645/711 [59:00<05:46, 5.26s/it]
|
| 774 |
91%|█████████ | 646/711 [59:05<05:38, 5.21s/it]
|
| 775 |
91%|█████████ | 647/711 [59:10<05:31, 5.18s/it]
|
| 776 |
91%|█████████ | 648/711 [59:15<05:25, 5.17s/it]
|
| 777 |
91%|█████████▏| 649/711 [59:20<05:18, 5.14s/it]
|
| 778 |
91%|█████████▏| 650/711 [59:25<05:18, 5.23s/it]
|
| 779 |
|
| 780 |
91%|███████�
|
|
|
|
|
|
|
| 781 |
92%|█████████▏| 651/711 [59:31<05:11, 5.20s/it]
|
| 782 |
92%|█████████▏| 652/711 [59:36<05:04, 5.16s/it]
|
| 783 |
92%|█████████▏| 653/711 [59:41<05:07, 5.31s/it]
|
| 784 |
92%|█████████▏| 654/711 [59:46<04:58, 5.24s/it]
|
| 785 |
92%|█████████▏| 655/711 [59:52<04:52, 5.22s/it]
|
| 786 |
92%|█████████▏| 656/711 [59:57<04:51, 5.30s/it]
|
| 787 |
92%|█████████▏| 657/711 [1:00:02<04:43, 5.26s/it]
|
| 788 |
93%|█████████▎| 658/711 [1:00:07<04:36, 5.22s/it]
|
| 789 |
93%|█████████▎| 659/711 [1:00:13<04:32, 5.23s/it]
|
| 790 |
93%|█████████▎| 660/711 [1:00:18<04:24, 5.19s/it]
|
| 791 |
|
| 792 |
93%|█████████▎| 660/711 [1:00:18<04:24, 5.19s/it]
|
| 793 |
93%|█████████▎| 661/711 [1:00:23<04:23, 5.28s/it]
|
| 794 |
93%|█████████▎| 662/711 [1:00:28<04:16, 5.23s/it]
|
| 795 |
93%
|
|
|
|
|
|
|
| 796 |
93%|█████████▎| 664/711 [1:00:39<04:02, 5.17s/it]
|
| 797 |
94%|█████████▎| 665/711 [1:00:44<03:57, 5.16s/it]
|
| 798 |
94%|█████████▎| 666/711 [1:00:49<03:54, 5.21s/it]
|
| 799 |
94%|█████████▍| 667/711 [1:00:54<03:48, 5.19s/it]
|
| 800 |
94%|█████████▍| 668/711 [1:00:59<03:41, 5.16s/it]
|
| 801 |
94%|█████████▍| 669/711 [1:01:04<03:35, 5.14s/it]
|
| 802 |
94%|█████████▍| 670/711 [1:01:10<03:33, 5.22s/it]
|
| 803 |
|
| 804 |
94%|█████████▍| 670/711 [1:01:10<03:33, 5.22s/it]
|
| 805 |
94%|█████████▍| 671/711 [1:01:15<03:27, 5.18s/it]
|
| 806 |
95%|█████████▍| 672/711 [1:01:20<03:23, 5.22s/it]
|
| 807 |
95%|█████████▍| 673/711 [1:01:25<03:17, 5.20s/it]
|
| 808 |
95%|█████████▍| 674/711 [1:01:30<03:10, 5.16s/it]
|
| 809 |
95%|█████████▍| 67
|
|
|
|
|
|
|
| 810 |
95%|█████████▌| 676/711 [1:01:42<03:13, 5.53s/it]
|
| 811 |
95%|█████████▌| 677/711 [1:01:48<03:11, 5.63s/it]
|
| 812 |
95%|█████████▌| 678/711 [1:01:53<03:02, 5.52s/it]
|
| 813 |
95%|█████████▌| 679/711 [1:01:58<02:52, 5.38s/it]
|
| 814 |
96%|█████████▌| 680/711 [1:02:03<02:44, 5.29s/it]
|
| 815 |
|
| 816 |
96%|█████████▌| 680/711 [1:02:03<02:44, 5.29s/it]
|
| 817 |
96%|█████████▌| 681/711 [1:02:08<02:37, 5.25s/it]
|
| 818 |
96%|█████████▌| 682/711 [1:02:14<02:33, 5.31s/it]
|
| 819 |
96%|█████████▌| 683/711 [1:02:19<02:27, 5.25s/it]
|
| 820 |
96%|█████████▌| 684/711 [1:02:24<02:20, 5.21s/it]
|
| 821 |
96%|█████████▋| 685/711 [1:02:29<02:14, 5.18s/it]
|
| 822 |
96%|█████████▋| 686/711 [1:02:34<02:09, 5.17s/it]
|
| 823 |
97%|█████████▋| 687/711 [1:02:39<02:03, 5.17s/it]
|
| 824 |
9
|
|
|
|
|
|
|
| 825 |
97%|█████████▋| 689/711 [1:02:50<01:53, 5.15s/it]
|
| 826 |
97%|█████████▋| 690/711 [1:02:55<01:47, 5.14s/it]
|
| 827 |
|
| 828 |
97%|█████████▋| 690/711 [1:02:55<01:47, 5.14s/it]
|
| 829 |
97%|█████████▋| 691/711 [1:03:00<01:43, 5.19s/it]
|
| 830 |
97%|█████████▋| 692/711 [1:03:05<01:37, 5.16s/it]
|
| 831 |
97%|█████████▋| 693/711 [1:03:10<01:32, 5.14s/it]
|
| 832 |
98%|█████████▊| 694/711 [1:03:15<01:27, 5.13s/it]
|
| 833 |
98%|█████████▊| 695/711 [1:03:21<01:22, 5.13s/it]
|
| 834 |
98%|█████████▊| 696/711 [1:03:26<01:17, 5.14s/it]
|
| 835 |
98%|█████████▊| 697/711 [1:03:31<01:11, 5.14s/it]
|
| 836 |
98%|█████████▊| 698/711 [1:03:36<01:06, 5.14s/it]
|
| 837 |
98%|█████████▊| 699/711 [1:03:41<01:02, 5.17s/it]
|
| 838 |
98%|█████████▊|
|
|
|
|
|
|
|
|
|
|
| 839 |
|
| 840 |
98%|█████████▊| 700/711 [1:03:47<00:57, 5.24s/it]
|
| 841 |
99%|█████████▊| 701/711 [1:03:52<00:52, 5.26s/it]
|
| 842 |
99%|█████████▊| 702/711 [1:03:57<00:47, 5.23s/it]
|
| 843 |
99%|█████████▉| 703/711 [1:04:02<00:41, 5.20s/it]
|
| 844 |
99%|█████████▉| 704/711 [1:04:07<00:36, 5.20s/it]
|
| 845 |
99%|█████████▉| 705/711 [1:04:13<00:31, 5.30s/it]
|
| 846 |
99%|█████████▉| 706/711 [1:04:18<00:26, 5.24s/it]
|
| 847 |
99%|█████████▉| 707/711 [1:04:23<00:21, 5.27s/it]
|
| 848 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 849 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
1: W1124 00:08:17.923000 737761 torch/distributed/run.py:792]
|
| 2 |
+
1: W1124 00:08:17.923000 737761 torch/distributed/run.py:792] *****************************************
|
| 3 |
+
1: W1124 00:08:17.923000 737761 torch/distributed/run.py:792] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed.
|
| 4 |
+
1: W1124 00:08:17.923000 737761 torch/distributed/run.py:792] *****************************************
|
| 5 |
+
0: W1124 00:08:17.924000 3081902 torch/distributed/run.py:792]
|
| 6 |
+
0: W1124 00:08:17.924000 3081902 torch/distributed/run.py:792] *****************************************
|
| 7 |
+
0: W1124 00:08:17.924000 3081902 torch/distributed/run.py:792] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed.
|
| 8 |
+
0: W1124 00:08:17.924000 3081902 torch/distributed/run.py:792] *****************************************
|
| 9 |
+
2: W1124 00:08:17.928000 1779991 torch/distributed/run.py:792]
|
| 10 |
+
2: W1124 00:08:17.928000 1779991 torch/distributed/run.py:792] *****************************************
|
| 11 |
+
2: W1124 00:08:17.928000 1779991 torch/distributed/run.py:792] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed.
|
| 12 |
+
2: W1124 00:08:17.928000 1779991 torch/distributed/run.py:792] *****************************************
|
| 13 |
+
3: W1124 00:08:17.934000 3626745 torch/distributed/run.py:792]
|
| 14 |
+
3: W1124 00:08:17.934000 3626745 torch/distributed/run.py:792] *****************************************
|
| 15 |
+
3: W1124 00:08:17.934000 3626745 torch/distributed/run.py:792] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed.
|
| 16 |
+
3: W1124 00:08:17.934000 3626745 torch/distributed/run.py:792] *****************************************
|
| 17 |
+
2: [2025-11-24 00:08:36,323] [INFO] [axolotl.utils.schemas.validation.check_eval_packing:119] [PID:1780066] [RANK:0] explicitly setting `eval_sample_packing` to match `sample_packing`[39m
|
| 18 |
+
0: [2025-11-24 00:08:36,323] [INFO] [axolotl.utils.schemas.validation.check_eval_packing:119] [PID:3081979] [RANK:0] explicitly setting `eval_sample_packing` to match `sample_packing`[39m
|
| 19 |
+
2: [2025-11-24 00:08:36,323] [INFO] [axolotl.utils.schemas.validation.hint_sample_packing_padding:218] [PID:1780066] [RANK:0] Setting `pad_to_sequence_len: true` to prevent memory leaks when sample_packing[39m
|
| 20 |
+
0: [2025-11-24 00:08:36,323] [INFO] [axolotl.utils.schemas.validation.hint_sample_packing_padding:218] [PID:3081979] [RANK:0] Setting `pad_to_sequence_len: true` to prevent memory leaks when sample_packing[39m
|
| 21 |
+
3: [2025-11-24 00:08:36,434] [INFO] [axolotl.utils.schemas.validation.check_eval_packing:119] [PID:3626820] [RANK:0] explicitly setting `eval_sample_packing` to match `sample_packing`[39m
|
| 22 |
+
3: [2025-11-24 00:08:36,434] [INFO] [axolotl.utils.schemas.validation.hint_sample_packing_padding:218] [PID:3626820] [RANK:0] Setting `pad_to_sequence_len: true` to prevent memory leaks when sample_packing[39m
|
| 23 |
+
1: [2025-11-24 00:08:36,535] [INFO] [axolotl.utils.schemas.validation.check_eval_packing:119] [PID:737836] [RANK:0] explicitly setting `eval_sample_packing` to match `sample_packing`[39m
|
| 24 |
+
1: [2025-11-24 00:08:36,535] [INFO] [axolotl.utils.schemas.validation.hint_sample_packing_padding:218] [PID:737836] [RANK:0] Setting `pad_to_sequence_len: true` to prevent memory leaks when sample_packing[39m
|
| 25 |
+
0: [33m[2025-11-24 00:08:40,005] [WARNING] [axolotl.utils.config.normalize_config:139] [PID:3081979] [RANK:0] Invalid value for save_steps (1.6666666666666667) from saves_per_epoch and/or num_epochs. Saving at training end only.[39m
|
| 26 |
+
0: [2025-11-24 00:08:40,025] [INFO] [axolotl.cli.config.load_cfg:245] [PID:3081979] [RANK:0] config:
|
| 27 |
+
0: {
|
| 28 |
+
0: "activation_offloading": false,
|
| 29 |
+
0: "auto_resume_from_checkpoints": true,
|
| 30 |
+
0: "axolotl_config_path": "/lustre/fswork/projects/rech/dgo/udv55np/train/tmp/1763939290349239182.yaml",
|
| 31 |
+
0: "base_model": "/lustre/fswork/projects/rech/qwv/udv55np/Gemma/base/gemma-3-12b",
|
| 32 |
+
0: "base_model_config": "/lustre/fswork/projects/rech/qwv/udv55np/Gemma/base/gemma-3-12b",
|
| 33 |
+
0: "batch_size": 16,
|
| 34 |
+
0: "bf16": true,
|
| 35 |
+
0: "capabilities": {
|
| 36 |
+
0: "bf16": true,
|
| 37 |
+
0: "compute_capability": "sm_90",
|
| 38 |
+
0: "fp8": false,
|
| 39 |
+
0: "n_gpu": 16,
|
| 40 |
+
0: "n_node": 1
|
| 41 |
+
0: },
|
| 42 |
+
0: "chat_template": "gemma3",
|
| 43 |
+
0: "context_parallel_size": 1,
|
| 44 |
+
0: "dataloader_num_workers": 16,
|
| 45 |
+
0: "dataloader_pin_memory": true,
|
| 46 |
+
0: "dataloader_prefetch_factor": 256,
|
| 47 |
+
0: "dataset_prepared_path": "/lustre/fswork/projects/rech/dgo/udv55np/dataset_gemma/Nemotron-Super-49B-v1_5/split_0",
|
| 48 |
+
0: "dataset_processes": 192,
|
| 49 |
+
0: "datasets": [
|
| 50 |
+
0: {
|
| 51 |
+
0: "chat_template": "tokenizer_default",
|
| 52 |
+
0: "data_files": [
|
| 53 |
+
0: "/lustre/fswork/projects/rech/qwv/udv55np/dataset/ift/Nemotron-Super-49B-v1_5/no_thinking/0007.jsonl",
|
| 54 |
+
0: "/lustre/fswork/projects/rech/qwv/udv55np/dataset/ift/Nemotron-Super-49B-v1_5/no_thinking/0009.jsonl",
|
| 55 |
+
0: "/lustre/fswork/projects/rech/qwv/udv55np/dataset/ift/Nemotron-Super-49B-v1_5/no_thinking/0005.jsonl",
|
| 56 |
+
0: "/lustre/fswork/projects/rech/qwv/udv55np/dataset/ift/Nemotron-Super-49B-v1_5/no_thinking/0006.jsonl",
|
| 57 |
+
0: "/lustre/fswork/projects/rech/qwv/udv55np/dataset/ift/Nemotron-Super-49B-v1_5/no_thinking/0014.jsonl",
|
| 58 |
+
0: "/lustre/fswork/projects/rech/qwv/udv55np/dataset/ift/Nemotron-Super-49B-v1_5/no_thinking/0010.jsonl",
|
| 59 |
+
0: "/lustre/fswork/projects/rech/qwv/udv55np/dataset/ift/Nemotron-Super-49B-v1_5/no_thinking/0012.jsonl",
|
| 60 |
+
0: "/lustre/fswork/projects/rech/qwv/udv55np/dataset/ift/Nemotron-Super-49B-v1_5/no_thinking/0008.jsonl",
|
| 61 |
+
0: "/lustre/fswork/projects/rech/qwv/udv55np/dataset/ift/Nemotron-Super-49B-v1_5/no_thinking/0001.jsonl",
|
| 62 |
+
0: "/lustre/fswork/projects/rech/qwv/udv55np/dataset/ift/Nemotron-Super-49B-v1_5/no_thinking/0002.jsonl",
|
| 63 |
+
0: "/lustre/fswork/projects/rech/qwv/udv55np/dataset/ift/Nemotron-Super-49B-v1_5/no_thinking/0013.jsonl",
|
| 64 |
+
0: "/lustre/fswork/projects/rech/qwv/udv55np/dataset/ift/Nemotron-Super-49B-v1_5/no_thinking/0015.jsonl",
|
| 65 |
+
0: "/lustre/fswork/projects/rech/qwv/udv55np/dataset/ift/Nemotron-Super-49B-v1_5/no_thinking/0004.jsonl",
|
| 66 |
+
0: "/lustre/fswork/projects/rech/qwv/udv55np/dataset/ift/Nemotron-Super-49B-v1_5/no_thinking/0011.jsonl",
|
| 67 |
+
0: "/lustre/fswork/projects/rech/qwv/udv55np/dataset/ift/Nemotron-Super-49B-v1_5/no_thinking/0000.jsonl",
|
| 68 |
+
0: "/lustre/fswork/projects/rech/qwv/udv55np/dataset/ift/Nemotron-Super-49B-v1_5/no_thinking/0003.jsonl"
|
| 69 |
+
0: ],
|
| 70 |
+
0: "ds_type": "json",
|
| 71 |
+
0: "field_messages": "conversations",
|
| 72 |
+
0: "message_property_mappings": {
|
| 73 |
+
0: "content": "content",
|
| 74 |
+
0: "role": "role"
|
| 75 |
+
0: },
|
| 76 |
+
0: "path": "/lustre/fswork/projects/rech/qwv/udv55np/dataset/ift/Nemotron-Super-49B-v1_5/no_thinking",
|
| 77 |
+
0: "trust_remote_code": false,
|
| 78 |
+
0: "type": "chat_template"
|
| 79 |
+
0: }
|
| 80 |
+
0: ],
|
| 81 |
+
0: "ddp": true,
|
| 82 |
+
0: "deepspeed": {
|
| 83 |
+
0: "bf16": {
|
| 84 |
+
0: "enabled": true
|
| 85 |
+
0: },
|
| 86 |
+
0: "gradient_accumulation_steps": "auto",
|
| 87 |
+
0: "gradient_clipping": "auto",
|
| 88 |
+
0: "train_batch_size": "auto",
|
| 89 |
+
0: "train_micro_batch_size_per_gpu": "auto",
|
| 90 |
+
0: "wall_clock_breakdown": false,
|
| 91 |
+
0: "zero_optimization": {
|
| 92 |
+
0: "contiguous_gradients": true,
|
| 93 |
+
0: "overlap_comm": true,
|
| 94 |
+
0: "reduce_bucket_size": "auto",
|
| 95 |
+
0: "stage": 3,
|
| 96 |
+
0: "stage3_gather_16bit_weights_on_model_save": true,
|
| 97 |
+
0: "stage3_param_persistence_threshold": "auto",
|
| 98 |
+
0: "stage3_prefetch_bucket_size": "auto",
|
| 99 |
+
0: "sub_group_size": 0
|
| 100 |
+
0: }
|
| 101 |
+
0: },
|
| 102 |
+
0: "device": "cuda:0",
|
| 103 |
+
0: "device_map": {
|
| 104 |
+
0: "": 0
|
| 105 |
+
0: },
|
| 106 |
+
0: "dion_rank_fraction": 1.0,
|
| 107 |
+
0: "dion_rank_multiple_of": 1,
|
| 108 |
+
0: "env_capabilities": {
|
| 109 |
+
0: "torch_version": "2.6.0"
|
| 110 |
+
0: },
|
| 111 |
+
0: "eot_tokens": [
|
| 112 |
+
0: "<end_of_turn>"
|
| 113 |
+
0: ],
|
| 114 |
+
0: "eval_batch_size": 1,
|
| 115 |
+
0: "eval_causal_lm_metrics": [
|
| 116 |
+
0: "sacrebleu",
|
| 117 |
+
0: "comet",
|
| 118 |
+
0: "ter",
|
| 119 |
+
0: "chrf"
|
| 120 |
+
0: ],
|
| 121 |
+
0: "eval_max_new_tokens": 128,
|
| 122 |
+
0: "eval_sample_packing": true,
|
| 123 |
+
0: "eval_table_size": 0,
|
| 124 |
+
0: "evals_per_epoch": 0,
|
| 125 |
+
0: "flash_attention": true,
|
| 126 |
+
0: "fp16": false,
|
| 127 |
+
0: "gradient_accumulation_steps": 1,
|
| 128 |
+
0: "gradient_checkpointing": true,
|
| 129 |
+
0: "gradient_checkpointing_kwargs": {
|
| 130 |
+
0: "use_reentrant": true
|
| 131 |
+
0: },
|
| 132 |
+
0: "is_multimodal": true,
|
| 133 |
+
0: "learning_rate": 2e-06,
|
| 134 |
+
0: "lisa_layers_attribute": "model.layers",
|
| 135 |
+
0: "load_best_model_at_end": false,
|
| 136 |
+
0: "load_in_4bit": false,
|
| 137 |
+
0: "load_in_8bit": false,
|
| 138 |
+
0: "local_rank": 0,
|
| 139 |
+
0: "logging_steps": 10,
|
| 140 |
+
0: "lora_dropout": 0.0,
|
| 141 |
+
0: "loraplus_lr_embedding": 1e-06,
|
| 142 |
+
0: "lr_scheduler": "warmup_stable_decay",
|
| 143 |
+
0: "lr_scheduler_kwargs": {
|
| 144 |
+
0: "min_lr_ratio": 0.1,
|
| 145 |
+
0: "num_decay_steps": 200
|
| 146 |
+
0: },
|
| 147 |
+
0: "max_prompt_len": 512,
|
| 148 |
+
0: "mean_resizing_embeddings": false,
|
| 149 |
+
0: "micro_batch_size": 1,
|
| 150 |
+
0: "model_config_type": "gemma3",
|
| 151 |
+
0: "num_epochs": 0.6,
|
| 152 |
+
0: "optimizer": "adamw_torch_fused",
|
| 153 |
+
0: "output_dir": "/lustre/fswork/projects/rech/dgo/udv55np/ift/Nemotron-Super-49B-v1_5/gemma-3-12b/0",
|
| 154 |
+
0: "pad_to_sequence_len": true,
|
| 155 |
+
0: "pretrain_multipack_attn": true,
|
| 156 |
+
0: "pretrain_multipack_buffer_size": 10000,
|
| 157 |
+
0: "processor_config": "/lustre/fswork/projects/rech/qwv/udv55np/Gemma/base/gemma-3-12b",
|
| 158 |
+
0: "profiler_steps_start": 0,
|
| 159 |
+
0: "qlora_sharded_model_loading": false,
|
| 160 |
+
0: "ray_num_workers": 1,
|
| 161 |
+
0: "resources_per_worker": {
|
| 162 |
+
0: "GPU": 1
|
| 163 |
+
0: },
|
| 164 |
+
0: "sample_packing": true,
|
| 165 |
+
0: "sample_packing_bin_size": 200,
|
| 166 |
+
0: "sample_packing_group_size": 100000,
|
| 167 |
+
0: "save_only_model": true,
|
| 168 |
+
0: "save_safetensors": true,
|
| 169 |
+
0: "save_total_limit": 20,
|
| 170 |
+
0: "saves_per_epoch": 1,
|
| 171 |
+
0: "sequence_len": 16384,
|
| 172 |
+
0: "shuffle_before_merging_datasets": false,
|
| 173 |
+
0: "shuffle_merged_datasets": true,
|
| 174 |
+
0: "skip_prepare_dataset": false,
|
| 175 |
+
0: "strict": false,
|
| 176 |
+
0: "tensor_parallel_size": 1,
|
| 177 |
+
0: "tf32": false,
|
| 178 |
+
0: "tiled_mlp_use_original_mlp": true,
|
| 179 |
+
0: "tokenizer_config": "/lustre/fswork/projects/rech/qwv/udv55np/Gemma/base/gemma-3-27b",
|
| 180 |
+
0: "torch_dtype": "torch.bfloat16",
|
| 181 |
+
0: "train_on_inputs": false,
|
| 182 |
+
0: "trl": {
|
| 183 |
+
0: "log_completions": false,
|
| 184 |
+
0: "mask_truncated_completions": false,
|
| 185 |
+
0: "ref_model_mixup_alpha": 0.9,
|
| 186 |
+
0: "ref_model_sync_steps": 64,
|
| 187 |
+
0: "scale_rewards": true,
|
| 188 |
+
0: "sync_ref_model": false,
|
| 189 |
+
0: "use_vllm": false,
|
| 190 |
+
0: "vllm_server_host": "0.0.0.0",
|
| 191 |
+
0: "vllm_server_port": 8000
|
| 192 |
+
0: },
|
| 193 |
+
0: "use_ray": false,
|
| 194 |
+
0: "use_tensorboard": true,
|
| 195 |
+
0: "val_set_size": 0.0,
|
| 196 |
+
0: "vllm": {
|
| 197 |
+
0: "device": "auto",
|
| 198 |
+
0: "dtype": "auto",
|
| 199 |
+
0: "gpu_memory_utilization": 0.9,
|
| 200 |
+
0: "host": "0.0.0.0",
|
| 201 |
+
0: "port": 8000
|
| 202 |
+
0: },
|
| 203 |
+
0: "warmup_steps": 100,
|
| 204 |
+
0: "weight_decay": 0.0,
|
| 205 |
+
0: "world_size": 16
|
| 206 |
+
0: }[39m
|
| 207 |
+
0: [2025-11-24 00:08:40,026] [INFO] [axolotl.cli.checks.check_user_token:35] [PID:3081979] [RANK:0] Skipping HuggingFace token verification because HF_HUB_OFFLINE is set to True. Only local files will be used.[39m
|
| 208 |
+
0: [2025-11-24 00:08:41,217] [INFO] [axolotl.utils.data.shared.load_preprocessed_dataset:472] [PID:3081979] [RANK:0] Loading prepared dataset from disk at /lustre/fswork/projects/rech/dgo/udv55np/dataset_gemma/Nemotron-Super-49B-v1_5/split_0/06698e902d3dba325ca34849b1dea5ea...[39m
|
| 209 |
+
0: [2025-11-24 00:09:14,927] [INFO] [axolotl.utils.samplers.multipack.calc_min_len:436] [PID:3081979] [RANK:0] gather_len_batches: [18976, 18976, 18976, 18975, 18977, 18976, 18975, 18976, 18976, 18975, 18976, 18976, 18976, 18976, 18976, 18976][39m
|
| 210 |
+
0: [2025-11-24 00:09:14,950] [INFO] [axolotl.utils.trainer.calc_sample_packing_eff_est:495] [PID:3081979] [RANK:0] sample_packing_eff_est across ranks: [0.9989354014396667, 0.9988301396369934, 0.9989880323410034, 0.9988827705383301, 0.9988827705383301, 0.9988827705383301, 0.9989354014396667, 0.9989354014396667, 0.9989354014396667, 0.9988827705383301, 0.9989354014396667, 0.9988827705383301, 0.9988827705383301, 0.9988827705383301, 0.9988827705383301, 0.9989354014396667][39m
|
| 211 |
+
0: [2025-11-24 00:09:14,959] [INFO] [axolotl.utils.data.sft._prepare_standard_dataset:127] [PID:3081979] [RANK:0] Maximum number of steps set at 711[39m
|
| 212 |
+
3: Using a slow image processor as `use_fast` is unset and a slow processor was saved with this model. `use_fast=True` will be the default behavior in v4.52, even if the model was saved with a slow processor. This will result in minor differences in outputs. You'll still be able to use a slow processor with `use_fast=False`.
|
| 213 |
+
1: Using a slow image processor as `use_fast` is unset and a slow processor was saved with this model. `use_fast=True` will be the default behavior in v4.52, even if the model was saved with a slow processor. This will result in minor differences in outputs. You'll still be able to use a slow processor with `use_fast=False`.
|
| 214 |
+
3: Using a slow image processor as `use_fast` is unset and a slow processor was saved with this model. `use_fast=True` will be the default behavior in v4.52, even if the model was saved with a slow processor. This will result in minor differences in outputs. You'll still be able to use a slow processor with `use_fast=False`.
|
| 215 |
+
1: Using a slow image processor as `use_fast` is unset and a slow processor was saved with this model. `use_fast=True` will be the default behavior in v4.52, even if the model was saved with a slow processor. This will result in minor differences in outputs. You'll still be able to use a slow processor with `use_fast=False`.
|
| 216 |
+
2: Using a slow image processor as `use_fast` is unset and a slow processor was saved with this model. `use_fast=True` will be the default behavior in v4.52, even if the model was saved with a slow processor. This will result in minor differences in outputs. You'll still be able to use a slow processor with `use_fast=False`.
|
| 217 |
+
2: Using a slow image processor as `use_fast` is unset and a slow processor was saved with this model. `use_fast=True` will be the default behavior in v4.52, even if the model was saved with a slow processor. This will result in minor differences in outputs. You'll still be able to use a slow processor with `use_fast=False`.
|
| 218 |
+
2: Using a slow image processor as `use_fast` is unset and a slow processor was saved with this model. `use_fast=True` will be the default behavior in v4.52, even if the model was saved with a slow processor. This will result in minor differences in outputs. You'll still be able to use a slow processor with `use_fast=False`.
|
| 219 |
+
2: Using a slow image processor as `use_fast` is unset and a slow processor was saved with this model. `use_fast=True` will be the default behavior in v4.52, even if the model was saved with a slow processor. This will result in minor differences in outputs. You'll still be able to use a slow processor with `use_fast=False`.
|
| 220 |
+
1: Using a slow image processor as `use_fast` is unset and a slow processor was saved with this model. `use_fast=True` will be the default behavior in v4.52, even if the model was saved with a slow processor. This will result in minor differences in outputs. You'll still be able to use a slow processor with `use_fast=False`.
|
| 221 |
+
1: Using a slow image processor as `use_fast` is unset and a slow processor was saved with this model. `use_fast=True` will be the default behavior in v4.52, even if the model was saved with a slow processor. This will result in minor differences in outputs. You'll still be able to use a slow processor with `use_fast=False`.
|
| 222 |
+
0: Using a slow image processor as `use_fast` is unset and a slow processor was saved with this model. `use_fast=True` will be the default behavior in v4.52, even if the model was saved with a slow processor. This will result in minor differences in outputs. You'll still be able to use a slow processor with `use_fast=False`.
|
| 223 |
+
3: Using a slow image processor as `use_fast` is unset and a slow processor was saved with this model. `use_fast=True` will be the default behavior in v4.52, even if the model was saved with a slow processor. This will result in minor differences in outputs. You'll still be able to use a slow processor with `use_fast=False`.
|
| 224 |
+
3: Using a slow image processor as `use_fast` is unset and a slow processor was saved with this model. `use_fast=True` will be the default behavior in v4.52, even if the model was saved with a slow processor. This will result in minor differences in outputs. You'll still be able to use a slow processor with `use_fast=False`.
|
| 225 |
+
0: Using a slow image processor as `use_fast` is unset and a slow processor was saved with this model. `use_fast=True` will be the default behavior in v4.52, even if the model was saved with a slow processor. This will result in minor differences in outputs. You'll still be able to use a slow processor with `use_fast=False`.
|
| 226 |
+
0: Using a slow image processor as `use_fast` is unset and a slow processor was saved with this model. `use_fast=True` will be the default behavior in v4.52, even if the model was saved with a slow processor. This will result in minor differences in outputs. You'll still be able to use a slow processor with `use_fast=False`.
|
| 227 |
+
0: Using a slow image processor as `use_fast` is unset and a slow processor was saved with this model. `use_fast=True` will be the default behavior in v4.52, even if the model was saved with a slow processor. This will result in minor differences in outputs. You'll still be able to use a slow processor with `use_fast=False`.
|
| 228 |
+
0: [2025-11-24 00:09:22,718] [INFO] [axolotl.monkeypatch.transformers.trainer_loss_calc.patch_evaluation_loop:110] [PID:3081979] [RANK:0] Patched Trainer.evaluation_loop with nanmean loss calculation[39m
|
| 229 |
+
0: [2025-11-24 00:09:22,719] [INFO] [axolotl.monkeypatch.transformers.trainer_loss_calc.patch_maybe_log_save_evaluate:164] [PID:3081979] [RANK:0] Patched Trainer._maybe_log_save_evaluate with nanmean loss calculation[39m
|
| 230 |
+
3:
|
| 231 |
+
1:
|
| 232 |
+
2:
|
| 233 |
+
0:
|
| 234 |
+
0: �█ | 3/5 [00:31<00:20, 10.27s/it]
|
| 235 |
+
3: �█ | 3/5 [00:31<00:20, 10.27s/it]
|
| 236 |
+
1: �█ | 3/5 [00:31<00:20, 10.27s/it]
|
| 237 |
+
1: s/it]
|
| 238 |
+
1:
|
| 239 |
+
3:
|
| 240 |
+
0:
|
| 241 |
+
1:
|
| 242 |
+
3:
|
| 243 |
+
0:
|
| 244 |
+
2: �█ | 3/5 [00:31<00:20, 10.27s/it]
|
| 245 |
+
3:
|
| 246 |
+
2:
|
| 247 |
+
2:
|
| 248 |
+
2:
|
| 249 |
+
1:
|
| 250 |
+
0:
|
| 251 |
+
0: [2025-11-24 00:10:19,017] [INFO] [axolotl.loaders.model._configure_embedding_dtypes:345] [PID:3081979] [RANK:0] Converting modules to torch.bfloat16[39m
|
| 252 |
+
0: [2025-11-24 00:10:22,748] [INFO] [axolotl.train.save_initial_configs:416] [PID:3081979] [RANK:0] Pre-saving tokenizer to /lustre/fswork/projects/rech/dgo/udv55np/ift/Nemotron-Super-49B-v1_5/gemma-3-12b/0...[39m
|
| 253 |
+
0: [2025-11-24 00:10:23,317] [INFO] [axolotl.train.save_initial_configs:419] [PID:3081979] [RANK:0] Pre-saving model config to /lustre/fswork/projects/rech/dgo/udv55np/ift/Nemotron-Super-49B-v1_5/gemma-3-12b/0...[39m
|
| 254 |
+
0: [2025-11-24 00:10:23,327] [INFO] [axolotl.train.save_initial_configs:423] [PID:3081979] [RANK:0] Pre-saving processor to /lustre/fswork/projects/rech/dgo/udv55np/ift/Nemotron-Super-49B-v1_5/gemma-3-12b/0...[39m
|
| 255 |
+
0: [2025-11-24 00:10:26,392] [INFO] [axolotl.train.execute_training:203] [PID:3081979] [RANK:0] Starting trainer...[39m
|
| 256 |
+
0: [2025-11-24 00:11:58,358] [INFO] [axolotl.utils.samplers.multipack.calc_min_len:436] [PID:3081979] [RANK:0] gather_len_batches: [18976, 18976, 18976, 18976, 18976, 18976, 18976, 18976, 18976, 18976, 18976, 18976, 18976, 18976, 18976, 18976][39m
|
| 257 |
+
0: Parameter Offload - Persistent parameters statistics: param_count = 563, numel = 1166448
|
| 258 |
+
0: {'loss': 0.6182, 'grad_norm': 2.8189747023390073, 'learning_rate': 3.62e-07, 'memory/max_mem_active(gib)': 67.99, 'memory/max_mem_allocated(gib)': 67.66, 'memory/device_mem_reserved(gib)': 75.77, 'epoch': 0.01}
|
| 259 |
+
0:
|
| 260 |
0%| | 0/711 [00:00<?, ?it/s]
|
| 261 |
0%| | 1/711 [03:14<38:20:16, 194.39s/it]
|
| 262 |
0%| | 2/711 [03:19<16:22:13, 83.12s/it]
|
| 263 |
0%| | 3/711 [03:24<9:20:24, 47.49s/it]
|
| 264 |
1%| | 4/711 [03:29<6:02:21, 30.75s/it]
|
| 265 |
1%| | 5/711 [03:35<4:14:24, 21.62s/it]
|
| 266 |
1%| | 6/711 [03:40<3:09:35, 16.14s/it]
|
| 267 |
1%| | 7/711 [03:45<2:27:08, 12.54s/it]
|
| 268 |
1%| | 8/711 [03:50<1:59:06, 10.17s/it]
|
| 269 |
1%|▏ | 9/711 [03:56<1:40:22, 8.58s/it]
|
| 270 |
1%|▏ | 10/711 [04:01<1:27:41, 7.51s/it]
|
| 271 |
|
| 272 |
1%|▏ | 10/711 [04:01<1:27:41, 7.51s/it]
|
| 273 |
2%|▏ | 11/711 [04:06<1:18:52, 6.76s/it]
|
| 274 |
2%|▏ | 12/711 [04:11<1:12:59, 6.27s/it]
|
| 275 |
2%|▏ | 13/711 [04:16<1:08:45, 5.91s/it]
|
| 276 |
2%|▏ | 14/711 [04:21<1:06:01, 5.68s/it]
|
| 277 |
2%|▏ | 15/711 [04:26<1:03:51, 5.51s/it]
|
| 278 |
2%|▏ | 16/711 [04:31<1:02:15, 5.38s/it]
|
| 279 |
2%|▏ | 17/711 [04:37<1:01:45, 5.34s/i
|
| 280 |
+
0: {'loss': 0.5822, 'grad_norm': 1.7276350224873818, 'learning_rate': 5.420000000000001e-07, 'memory/max_mem_active(gib)': 67.99, 'memory/max_mem_allocated(gib)': 67.66, 'memory/device_mem_reserved(gib)': 75.77, 'epoch': 0.02}
|
| 281 |
+
0: {'loss': 0.5571, 'grad_norm': 2.161001413543057, 'learning_rate': 7.219999999999999e-07, 'memory/max_mem_active(gib)': 67.99, 'memory/max_mem_allocated(gib)': 67.66, 'memory/device_mem_reserved(gib)': 75.77, 'epoch': 0.03}
|
| 282 |
+
0: t]
|
| 283 |
3%|▎ | 18/711 [04:42<1:01:12, 5.30s/it]
|
| 284 |
3%|▎ | 19/711 [04:47<1:00:37, 5.26s/it]
|
| 285 |
3%|▎ | 20/711 [04:52<1:00:10, 5.23s/it]
|
| 286 |
|
| 287 |
3%|▎ | 20/711 [04:52<1:00:10, 5.23s/it]
|
| 288 |
3%|▎ | 21/711 [04:57<59:44, 5.19s/it]
|
| 289 |
3%|▎ | 22/711 [05:02<59:22, 5.17s/it]
|
| 290 |
3%|▎ | 23/711 [05:07<59:00, 5.15s/it]
|
| 291 |
3%|▎ | 24/711 [05:13<58:57, 5.15s/it]
|
| 292 |
4%|▎ | 25/711 [05:18<59:09, 5.17s/it]
|
| 293 |
4%|▎ | 26/711 [05:23<58:51, 5.16s/it]
|
| 294 |
4%|▍ | 27/711 [05:28<58:27, 5.13s/it]
|
| 295 |
4%|▍ | 28/711 [05:33<58:17, 5.12s/it]
|
| 296 |
4%|▍ | 29/711 [05:38<58:07, 5.11s/it]
|
| 297 |
4%|▍ | 30/711 [05:44<59:10, 5.21s/it]
|
| 298 |
|
| 299 |
4%|▍ | 30/711 [05:44<59:10, 5.21s/it]
|
| 300 |
4%|▍ | 31/711 [05:49<58:50, 5.19s/it]
|
| 301 |
5%|▍ | 32/711 [05:54<58:36, 5.18s/it]
|
| 302 |
5%|▍ | 33/711 [05:59<59:31, 5
|
| 303 |
+
0: {'loss': 0.5218, 'grad_norm': 1.0906530849609661, 'learning_rate': 9.020000000000001e-07, 'memory/max_mem_active(gib)': 67.99, 'memory/max_mem_allocated(gib)': 67.66, 'memory/device_mem_reserved(gib)': 75.77, 'epoch': 0.03}
|
| 304 |
+
0: .27s/it]
|
| 305 |
5%|▍ | 34/711 [06:04<58:50, 5.22s/it]
|
| 306 |
5%|▍ | 35/711 [06:10<58:21, 5.18s/it]
|
| 307 |
5%|▌ | 36/711 [06:15<57:59, 5.16s/it]
|
| 308 |
5%|▌ | 37/711 [06:20<57:35, 5.13s/it]
|
| 309 |
5%|▌ | 38/711 [06:25<57:22, 5.12s/it]
|
| 310 |
5%|▌ | 39/711 [06:30<57:16, 5.11s/it]
|
| 311 |
6%|▌ | 40/711 [06:35<57:20, 5.13s/it]
|
| 312 |
|
| 313 |
6%|▌ | 40/711 [06:35<57:20, 5.13s/it]
|
| 314 |
6%|▌ | 41/711 [06:40<57:23, 5.14s/it]
|
| 315 |
6%|▌ | 42/711 [06:45<57:09, 5.13s/it]
|
| 316 |
6%|▌ | 43/711 [06:51<57:31, 5.17s/it]
|
| 317 |
6%|▌ | 44/711 [06:56<57:25, 5.17s/it]
|
| 318 |
6%|▋ | 45/711 [07:01<57:08, 5.15s/it]
|
| 319 |
6%|▋ | 46/711 [07:06<57:01, 5.15s/it]
|
| 320 |
7%|▋ | 47/711 [07:11<56:53, 5.14s/it]
|
| 321 |
7%|▋ | 48/711 [07:16<57:06, 5.17s/it]
|
| 322 |
7%|▋ | 49/711 [07:22<57:03, 5.17s/it]
|
| 323 |
7%|▋ | 50/711 [07:27<57:26, 5.21s/it]
|
| 324 |
|
| 325 |
+
0: {'loss': 0.4959, 'grad_norm': 0.8804401875885586, 'learning_rate': 1.082e-06, 'memory/max_mem_active(gib)': 67.99, 'memory/max_mem_allocated(gib)': 67.66, 'memory/device_mem_reserved(gib)': 75.77, 'epoch': 0.04}
|
| 326 |
+
0: {'loss': 0.4729, 'grad_norm': 1.7572079203155466, 'learning_rate': 1.262e-06, 'memory/max_mem_active(gib)': 67.99, 'memory/max_mem_allocated(gib)': 67.66, 'memory/device_mem_reserved(gib)': 75.77, 'epoch': 0.05}
|
| 327 |
+
0:
|
| 328 |
7%|▋ | 50/711 [07:27<57:26, 5.21s/it]
|
| 329 |
7%|▋ | 51/711 [07:32<57:06, 5.19s/it]
|
| 330 |
7%|▋ | 52/711 [07:37<57:02, 5.19s/it]
|
| 331 |
7%|▋ | 53/711 [07:43<57:29, 5.24s/it]
|
| 332 |
8%|▊ | 54/711 [07:48<58:04, 5.30s/it]
|
| 333 |
8%|▊ | 55/711 [07:53<57:20, 5.25s/it]
|
| 334 |
8%|▊ | 56/711 [07:59<57:54, 5.30s/it]
|
| 335 |
8%|▊ | 57/711 [08:04<57:10, 5.25s/it]
|
| 336 |
8%|▊ | 58/711 [08:09<57:55, 5.32s/it]
|
| 337 |
8%|▊ | 59/711 [08:14<57:11, 5.26s/it]
|
| 338 |
8%|▊ | 60/711 [08:19<56:30, 5.21s/it]
|
| 339 |
|
| 340 |
8%|▊ | 60/711 [08:19<56:30, 5.21s/it]
|
| 341 |
9%|▊ | 61/711 [08:25<56:35, 5.22s/it]
|
| 342 |
9%|▊ | 62/711 [08:30<56:18, 5.21s/it]
|
| 343 |
9%|▉ | 63/711 [08:35<57:08, 5.29s/it]
|
| 344 |
9%|▉ | 64/711 [08:41<56:59, 5.28s/it]
|
| 345 |
9%|▉ | 65/711 [08:46<56:22, 5.24s/it]
|
| 346 |
9%|▉ | 66/711 [08:51<56:36, 5.27s/it]
|
| 347 |
9%|▉ | 67/711 [08:56<56:06, 5.23s/it]
|
| 348 |
1
|
| 349 |
+
0: {'loss': 0.4737, 'grad_norm': 1.0832691281380182, 'learning_rate': 1.442e-06, 'memory/max_mem_active(gib)': 67.99, 'memory/max_mem_allocated(gib)': 67.66, 'memory/device_mem_reserved(gib)': 75.77, 'epoch': 0.06}
|
| 350 |
+
0: {'loss': 0.4648, 'grad_norm': 0.9351167776948649, 'learning_rate': 1.622e-06, 'memory/max_mem_active(gib)': 67.99, 'memory/max_mem_allocated(gib)': 67.66, 'memory/device_mem_reserved(gib)': 75.77, 'epoch': 0.07}
|
| 351 |
+
0: 0%|▉ | 68/711 [09:01<55:32, 5.18s/it]
|
| 352 |
10%|▉ | 69/711 [09:06<55:12, 5.16s/it]
|
| 353 |
10%|▉ | 70/711 [09:12<55:17, 5.18s/it]
|
| 354 |
|
| 355 |
10%|▉ | 70/711 [09:12<55:17, 5.18s/it]
|
| 356 |
10%|▉ | 71/711 [09:17<54:55, 5.15s/it]
|
| 357 |
10%|█ | 72/711 [09:22<54:39, 5.13s/it]
|
| 358 |
10%|█ | 73/711 [09:27<54:25, 5.12s/it]
|
| 359 |
10%|█ | 74/711 [09:32<54:18, 5.12s/it]
|
| 360 |
11%|█ | 75/711 [09:37<54:05, 5.10s/it]
|
| 361 |
11%|█ | 76/711 [09:42<54:01, 5.11s/it]
|
| 362 |
11%|█ | 77/711 [09:47<53:56, 5.11s/it]
|
| 363 |
11%|█ | 78/711 [09:52<54:06, 5.13s/it]
|
| 364 |
11%|█ | 79/711 [09:57<53:56, 5.12s/it]
|
| 365 |
11%|█▏ | 80/711 [10:03<54:04, 5.14s/it]
|
| 366 |
|
| 367 |
11%|█▏ | 80/711 [10:03<54:04, 5.14s/it]
|
| 368 |
11%|█▏ | 81/711 [10:08<55:07, 5.25s/it]
|
| 369 |
12%|█▏ | 82/711 [10:13<54:50, 5.23s/it]
|
| 370 |
12%|█▏ | 83/711 [10:19<55:29, 5.30s/it
|
| 371 |
+
0: {'loss': 0.4437, 'grad_norm': 1.0944333242533355, 'learning_rate': 1.802e-06, 'memory/max_mem_active(gib)': 67.99, 'memory/max_mem_allocated(gib)': 67.66, 'memory/device_mem_reserved(gib)': 75.77, 'epoch': 0.08}
|
| 372 |
+
0: ]
|
| 373 |
12%|█▏ | 84/711 [10:24<54:57, 5.26s/it]
|
| 374 |
12%|█▏ | 85/711 [10:29<54:21, 5.21s/it]
|
| 375 |
12%|█▏ | 86/711 [10:35<55:05, 5.29s/it]
|
| 376 |
12%|█▏ | 87/711 [10:40<54:33, 5.25s/it]
|
| 377 |
12%|█▏ | 88/711 [10:45<54:02, 5.20s/it]
|
| 378 |
13%|█▎ | 89/711 [10:50<53:35, 5.17s/it]
|
| 379 |
13%|█▎ | 90/711 [10:55<53:14, 5.14s/it]
|
| 380 |
|
| 381 |
13%|█▎ | 90/711 [10:55<53:14, 5.14s/it]
|
| 382 |
13%|█▎ | 91/711 [11:00<52:59, 5.13s/it]
|
| 383 |
13%|█▎ | 92/711 [11:05<53:42, 5.21s/it]
|
| 384 |
13%|█▎ | 93/711 [11:11<54:32, 5.30s/it]
|
| 385 |
13%|█▎ | 94/711 [11:16<54:50, 5.33s/it]
|
| 386 |
13%|█▎ | 95/711 [11:22<55:36, 5.42s/it]
|
| 387 |
14%|█▎ | 96/711 [11:27<55:11, 5.38s/it]
|
| 388 |
14%|█▎ | 97/711 [11:32<54:13, 5.30s/it]
|
| 389 |
14%|█▍ | 98/711 [11:38<54:39, 5.35s/it]
|
| 390 |
14%|█▍ | 99/711 [11:43<53:58, 5.29s/it]
|
| 391 |
14%|█▍ | 100/711 [11:48<53:15, 5.23s/it]
|
| 392 |
|
| 393 |
+
0: {'loss': 0.4312, 'grad_norm': 0.821415164120209, 'learning_rate': 1.982e-06, 'memory/max_mem_active(gib)': 67.99, 'memory/max_mem_allocated(gib)': 67.66, 'memory/device_mem_reserved(gib)': 75.77, 'epoch': 0.08}
|
| 394 |
+
0: {'loss': 0.4519, 'grad_norm': 1.098049116364939, 'learning_rate': 2e-06, 'memory/max_mem_active(gib)': 67.99, 'memory/max_mem_allocated(gib)': 67.66, 'memory/device_mem_reserved(gib)': 75.77, 'epoch': 0.09}
|
| 395 |
+
0:
|
| 396 |
14%|█▍ | 100/711 [11:48<53:15, 5.23s/it]
|
| 397 |
14%|█▍ | 101/711 [11:53<52:46, 5.19s/it]
|
| 398 |
14%|█▍ | 102/711 [11:58<52:21, 5.16s/it]
|
| 399 |
14%|█▍ | 103/711 [12:03<52:00, 5.13s/it]
|
| 400 |
15%|█▍ | 104/711 [12:08<51:44, 5.11s/it]
|
| 401 |
15%|█▍ | 105/711 [12:14<51:39, 5.11s/it]
|
| 402 |
15%|█▍ | 106/711 [12:19<51:43, 5.13s/it]
|
| 403 |
15%|█▌ | 107/711 [12:24<51:34, 5.12s/it]
|
| 404 |
15%|█▌ | 108/711 [12:29<51:22, 5.11s/it]
|
| 405 |
15%|█▌ | 109/711 [12:34<51:11, 5.10s/it]
|
| 406 |
15%|█▌ | 110/711 [12:39<51:14, 5.12s/it]
|
| 407 |
|
| 408 |
15%|█▌ | 110/711 [12:39<51:14, 5.12s/it]
|
| 409 |
16%|█▌ | 111/711 [12:44<51:43, 5.17s/it]
|
| 410 |
16%|█▌ | 112/711 [12:50<51:33, 5.16s/it]
|
| 411 |
16%|█▌ | 113/711 [12:55<52:08, 5.23s/it]
|
| 412 |
16%|█▌ | 114/711 [13:00<51:48, 5.21s/it]
|
| 413 |
16%|█▌ | 115/711 [13:05<51:23, 5.17s/it]
|
| 414 |
16%|█▋ |
|
| 415 |
+
0: {'loss': 0.4418, 'grad_norm': 0.8654847799165983, 'learning_rate': 2e-06, 'memory/max_mem_active(gib)': 67.99, 'memory/max_mem_allocated(gib)': 67.66, 'memory/device_mem_reserved(gib)': 75.77, 'epoch': 0.1}
|
| 416 |
+
0: {'loss': 0.4272, 'grad_norm': 0.8743836149172823, 'learning_rate': 2e-06, 'memory/max_mem_active(gib)': 67.99, 'memory/max_mem_allocated(gib)': 67.66, 'memory/device_mem_reserved(gib)': 75.77, 'epoch': 0.11}
|
| 417 |
+
0: 116/711 [13:10<51:17, 5.17s/it]
|
| 418 |
16%|█▋ | 117/711 [13:16<51:03, 5.16s/it]
|
| 419 |
17%|█▋ | 118/711 [13:21<50:59, 5.16s/it]
|
| 420 |
17%|█▋ | 119/711 [13:26<51:20, 5.20s/it]
|
| 421 |
17%|█▋ | 120/711 [13:31<50:54, 5.17s/it]
|
| 422 |
|
| 423 |
17%|█▋ | 120/711 [13:31<50:54, 5.17s/it]
|
| 424 |
17%|█▋ | 121/711 [13:36<50:42, 5.16s/it]
|
| 425 |
17%|█▋ | 122/711 [13:41<50:37, 5.16s/it]
|
| 426 |
17%|█▋ | 123/711 [13:47<50:33, 5.16s/it]
|
| 427 |
17%|█▋ | 124/711 [13:52<50:44, 5.19s/it]
|
| 428 |
18%|█▊ | 125/711 [13:57<51:31, 5.28s/it]
|
| 429 |
18%|█▊ | 126/711 [14:03<52:11, 5.35s/it]
|
| 430 |
18%|█▊ | 127/711 [14:08<52:12, 5.36s/it]
|
| 431 |
18%|█▊ | 128/711 [14:14<52:10, 5.37s/it]
|
| 432 |
18%|█▊ | 129/711 [14:19<51:21, 5.29s/it]
|
| 433 |
18%|█▊ | 130/711 [14:24<50:50, 5.25s/it]
|
| 434 |
|
| 435 |
18%|█▊ | 130/711 [14:24<50:50, 5.25s/it]
|
| 436 |
18%|█▊ | 131/
|
| 437 |
+
0: {'loss': 0.4317, 'grad_norm': 0.886837889977122, 'learning_rate': 2e-06, 'memory/max_mem_active(gib)': 67.99, 'memory/max_mem_allocated(gib)': 67.66, 'memory/device_mem_reserved(gib)': 75.77, 'epoch': 0.12}
|
| 438 |
+
0: 711 [14:29<50:24, 5.22s/it]
|
| 439 |
19%|█▊ | 132/711 [14:34<50:04, 5.19s/it]
|
| 440 |
19%|█▊ | 133/711 [14:39<50:21, 5.23s/it]
|
| 441 |
19%|█▉ | 134/711 [14:45<49:58, 5.20s/it]
|
| 442 |
19%|█▉ | 135/711 [14:50<49:37, 5.17s/it]
|
| 443 |
19%|█▉ | 136/711 [14:55<49:54, 5.21s/it]
|
| 444 |
19%|█▉ | 137/711 [15:00<49:29, 5.17s/it]
|
| 445 |
19%|█▉ | 138/711 [15:05<49:14, 5.16s/it]
|
| 446 |
20%|█▉ | 139/711 [15:10<49:05, 5.15s/it]
|
| 447 |
20%|█▉ | 140/711 [15:16<49:19, 5.18s/it]
|
| 448 |
|
| 449 |
20%|█▉ | 140/711 [15:16<49:19, 5.18s/it]
|
| 450 |
20%|█▉ | 141/711 [15:21<49:04, 5.17s/it]
|
| 451 |
20%|█▉ | 142/711 [15:26<48:43, 5.14s/it]
|
| 452 |
20%|██ | 143/711 [15:31<48:31, 5.13s/it]
|
| 453 |
20%|██ | 144/711 [15:36<48:59, 5.19s/it]
|
| 454 |
20%|██ | 145/711 [15:41<48:54, 5.19s/it]
|
| 455 |
21%|██ | 146/711 [15:46<48:39, 5.17s/it]
|
| 456 |
21%|██ | 147/711 [15:52<48:22, 5.15s/it]
|
| 457 |
21%|██ | 148/7
|
| 458 |
+
0: {'loss': 0.4309, 'grad_norm': 1.0717708423744885, 'learning_rate': 2e-06, 'memory/max_mem_active(gib)': 67.99, 'memory/max_mem_allocated(gib)': 67.66, 'memory/device_mem_reserved(gib)': 75.77, 'epoch': 0.13}
|
| 459 |
+
0: {'loss': 0.4316, 'grad_norm': 0.8573484702226136, 'learning_rate': 2e-06, 'memory/max_mem_active(gib)': 67.99, 'memory/max_mem_allocated(gib)': 67.66, 'memory/device_mem_reserved(gib)': 75.77, 'epoch': 0.13}
|
| 460 |
+
0: 11 [15:57<48:11, 5.14s/it]
|
| 461 |
21%|██ | 149/711 [16:02<48:06, 5.14s/it]
|
| 462 |
21%|██ | 150/711 [16:07<48:06, 5.14s/it]
|
| 463 |
|
| 464 |
21%|██ | 150/711 [16:07<48:06, 5.14s/it]
|
| 465 |
21%|██ | 151/711 [16:12<48:00, 5.14s/it]
|
| 466 |
21%|██▏ | 152/711 [16:17<48:35, 5.22s/it]
|
| 467 |
22%|██▏ | 153/711 [16:23<48:21, 5.20s/it]
|
| 468 |
22%|██▏ | 154/711 [16:28<48:00, 5.17s/it]
|
| 469 |
22%|██▏ | 155/711 [16:33<47:54, 5.17s/it]
|
| 470 |
22%|██▏ | 156/711 [16:38<47:42, 5.16s/it]
|
| 471 |
22%|██▏ | 157/711 [16:43<47:36, 5.16s/it]
|
| 472 |
22%|██▏ | 158/711 [16:48<47:27, 5.15s/it]
|
| 473 |
22%|██▏ | 159/711 [16:54<48:09, 5.23s/it]
|
| 474 |
23%|██▎ | 160/711 [16:59<47:40, 5.19s/it]
|
| 475 |
|
| 476 |
23%|██▎ | 160/711 [16:59<47:40, 5.19s/it]
|
| 477 |
23%|██▎ | 161/711 [17:04<47:17, 5.16s/it]
|
| 478 |
23%|██▎ | 162/711 [17:09<47:17, 5.17s/it]
|
| 479 |
23%|�
|
| 480 |
+
0: {'loss': 0.4239, 'grad_norm': 0.8728825320101697, 'learning_rate': 2e-06, 'memory/max_mem_active(gib)': 67.99, 'memory/max_mem_allocated(gib)': 67.66, 'memory/device_mem_reserved(gib)': 75.77, 'epoch': 0.14}
|
| 481 |
+
0: ��█▎ | 163/711 [17:14<46:56, 5.14s/it]
|
| 482 |
23%|██▎ | 164/711 [17:20<47:49, 5.25s/it]
|
| 483 |
23%|██▎ | 165/711 [17:25<47:18, 5.20s/it]
|
| 484 |
23%|██▎ | 166/711 [17:30<47:54, 5.27s/it]
|
| 485 |
23%|██▎ | 167/711 [17:35<47:21, 5.22s/it]
|
| 486 |
24%|██▎ | 168/711 [17:41<47:10, 5.21s/it]
|
| 487 |
24%|██▍ | 169/711 [17:46<46:57, 5.20s/it]
|
| 488 |
24%|██▍ | 170/711 [17:51<46:38, 5.17s/it]
|
| 489 |
|
| 490 |
24%|██▍ | 170/711 [17:51<46:38, 5.17s/it]
|
| 491 |
24%|██▍ | 171/711 [17:56<46:21, 5.15s/it]
|
| 492 |
24%|██▍ | 172/711 [18:01<46:17, 5.15s/it]
|
| 493 |
24%|██▍ | 173/711 [18:06<46:18, 5.16s/it]
|
| 494 |
24%|██▍ | 174/711 [18:11<46:01, 5.14s/it]
|
| 495 |
25%|██▍ | 175/711 [18:16<45:50, 5.13s/it]
|
| 496 |
25%|██▍ | 176/711 [18:22<45:49, 5.14s/it]
|
| 497 |
25%|██▍ | 177/711 [18:27<45:49, 5.15s/it]
|
| 498 |
25%|██▌ | 178/711 [18:32<46:45, 5.26s/it]
|
| 499 |
25%|██▌ | 179/
|
| 500 |
+
0: {'loss': 0.4173, 'grad_norm': 2.470513995230686, 'learning_rate': 2e-06, 'memory/max_mem_active(gib)': 67.99, 'memory/max_mem_allocated(gib)': 67.66, 'memory/device_mem_reserved(gib)': 75.77, 'epoch': 0.15}
|
| 501 |
+
0: {'loss': 0.4151, 'grad_norm': 0.9038938137872402, 'learning_rate': 2e-06, 'memory/max_mem_active(gib)': 67.99, 'memory/max_mem_allocated(gib)': 67.66, 'memory/device_mem_reserved(gib)': 75.77, 'epoch': 0.16}
|
| 502 |
+
0: 711 [18:37<46:12, 5.21s/it]
|
| 503 |
25%|██▌ | 180/711 [18:43<45:49, 5.18s/it]
|
| 504 |
|
| 505 |
25%|██▌ | 180/711 [18:43<45:49, 5.18s/it]
|
| 506 |
25%|██▌ | 181/711 [18:48<45:33, 5.16s/it]
|
| 507 |
26%|██▌ | 182/711 [18:53<45:22, 5.15s/it]
|
| 508 |
26%|██▌ | 183/711 [18:58<45:10, 5.13s/it]
|
| 509 |
26%|██▌ | 184/711 [19:03<45:33, 5.19s/it]
|
| 510 |
26%|██▌ | 185/711 [19:08<45:10, 5.15s/it]
|
| 511 |
26%|██▌ | 186/711 [19:13<44:56, 5.14s/it]
|
| 512 |
26%|██▋ | 187/711 [19:18<44:44, 5.12s/it]
|
| 513 |
26%|██▋ | 188/711 [19:24<45:35, 5.23s/it]
|
| 514 |
27%|██▋ | 189/711 [19:29<45:10, 5.19s/it]
|
| 515 |
27%|██▋ | 190/711 [19:34<44:59, 5.18s/it]
|
| 516 |
|
| 517 |
27%|██▋ | 190/711 [19:34<44:59, 5.18s/it]
|
| 518 |
27%|██▋ | 191/711 [19:39<44:41, 5.16s/it]
|
| 519 |
27%|██▋ | 192/711 [19:44<44:29, 5.14s/it]
|
| 520 |
27%|██▋ | 193/711 [19:50<44:25, 5.15s/i
|
| 521 |
+
0: {'loss': 0.4194, 'grad_norm': 2.3527260378633015, 'learning_rate': 2e-06, 'memory/max_mem_active(gib)': 67.99, 'memory/max_mem_allocated(gib)': 67.66, 'memory/device_mem_reserved(gib)': 75.77, 'epoch': 0.17}
|
| 522 |
+
0: t]
|
| 523 |
27%|██▋ | 194/711 [19:55<44:10, 5.13s/it]
|
| 524 |
27%|██▋ | 195/711 [20:00<44:25, 5.17s/it]
|
| 525 |
28%|██▊ | 196/711 [20:05<44:10, 5.15s/it]
|
| 526 |
28%|██▊ | 197/711 [20:10<44:27, 5.19s/it]
|
| 527 |
28%|██▊ | 198/711 [20:16<45:08, 5.28s/it]
|
| 528 |
28%|██▊ | 199/711 [20:21<44:34, 5.22s/it]
|
| 529 |
28%|██▊ | 200/711 [20:26<44:17, 5.20s/it]
|
| 530 |
|
| 531 |
28%|██▊ | 200/711 [20:26<44:17, 5.20s/it]
|
| 532 |
28%|██▊ | 201/711 [20:31<43:55, 5.17s/it]
|
| 533 |
28%|██▊ | 202/711 [20:36<43:35, 5.14s/it]
|
| 534 |
29%|██▊ | 203/711 [20:41<43:37, 5.15s/it]
|
| 535 |
29%|██▊ | 204/711 [20:47<44:14, 5.24s/it]
|
| 536 |
29%|██▉ | 205/711 [20:52<44:01, 5.22s/it]
|
| 537 |
29%|██▉ | 206/711 [20:57<43:43, 5.20s/it]
|
| 538 |
29%|██▉ | 207/711 [21:02<43:30, 5.18s/it]
|
| 539 |
29%|██▉ | 208/711 [21:07<43:14, 5.16s/it]
|
| 540 |
29%|██▉ | 209/711 [21:12<43:02, 5.15s/it]
|
| 541 |
30%|██▉
|
| 542 |
+
0: {'loss': 0.413, 'grad_norm': 0.893185793908122, 'learning_rate': 2e-06, 'memory/max_mem_active(gib)': 67.99, 'memory/max_mem_allocated(gib)': 67.66, 'memory/device_mem_reserved(gib)': 75.77, 'epoch': 0.18}
|
| 543 |
+
0: {'loss': 0.4217, 'grad_norm': 1.160958862723743, 'learning_rate': 2e-06, 'memory/max_mem_active(gib)': 67.99, 'memory/max_mem_allocated(gib)': 67.66, 'memory/device_mem_reserved(gib)': 75.77, 'epoch': 0.19}
|
| 544 |
+
0: | 210/711 [21:18<42:50, 5.13s/it]
|
| 545 |
|
| 546 |
30%|██▉ | 210/711 [21:18<42:50, 5.13s/it]
|
| 547 |
30%|██▉ | 211/711 [21:23<42:40, 5.12s/it]
|
| 548 |
30%|██▉ | 212/711 [21:28<42:38, 5.13s/it]
|
| 549 |
30%|██▉ | 213/711 [21:33<42:38, 5.14s/it]
|
| 550 |
30%|███ | 214/711 [21:38<42:36, 5.14s/it]
|
| 551 |
30%|███ | 215/711 [21:43<42:24, 5.13s/it]
|
| 552 |
30%|███ | 216/711 [21:48<42:15, 5.12s/it]
|
| 553 |
31%|███ | 217/711 [21:53<42:11, 5.12s/it]
|
| 554 |
31%|███ | 218/711 [21:59<42:01, 5.12s/it]
|
| 555 |
31%|███ | 219/711 [22:04<42:01, 5.12s/it]
|
| 556 |
31%|███ | 220/711 [22:09<41:50, 5.11s/it]
|
| 557 |
|
| 558 |
31%|███ | 220/711 [22:09<41:50, 5.11s/it]
|
| 559 |
31%|███ | 221/711 [22:14<42:03, 5.15s/it]
|
| 560 |
31%|███ | 222/711 [22:19<42:36, 5.23s/it]
|
| 561 |
31%|███▏ | 223/711 [22:25<42:39, 5.25s/it]
|
| 562 |
32%|███▏ | 224/711 [22:30<42
|
| 563 |
+
0: {'loss': 0.4102, 'grad_norm': 0.8461972280700218, 'learning_rate': 2e-06, 'memory/max_mem_active(gib)': 67.99, 'memory/max_mem_allocated(gib)': 67.66, 'memory/device_mem_reserved(gib)': 75.77, 'epoch': 0.19}
|
| 564 |
+
0: :22, 5.22s/it]
|
| 565 |
32%|███▏ | 225/711 [22:35<42:13, 5.21s/it]
|
| 566 |
32%|███▏ | 226/711 [22:40<42:40, 5.28s/it]
|
| 567 |
32%|███▏ | 227/711 [22:46<42:11, 5.23s/it]
|
| 568 |
32%|███▏ | 228/711 [22:51<41:45, 5.19s/it]
|
| 569 |
32%|███▏ | 229/711 [22:56<41:33, 5.17s/it]
|
| 570 |
32%|███▏ | 230/711 [23:01<41:16, 5.15s/it]
|
| 571 |
|
| 572 |
32%|███▏ | 230/711 [23:01<41:16, 5.15s/it]
|
| 573 |
32%|███▏ | 231/711 [23:06<41:02, 5.13s/it]
|
| 574 |
33%|███▎ | 232/711 [23:11<40:52, 5.12s/it]
|
| 575 |
33%|███▎ | 233/711 [23:16<40:57, 5.14s/it]
|
| 576 |
33%|███▎ | 234/711 [23:21<40:58, 5.15s/it]
|
| 577 |
33%|███▎ | 235/711 [23:27<40:52, 5.15s/it]
|
| 578 |
33%|███▎ | 236/711 [23:32<40:41, 5.14s/it]
|
| 579 |
33%|███▎ | 237/711 [23:37<40:32, 5.13s/it]
|
| 580 |
33%|███▎ | 238/711 [23:42<40:21, 5.12s/it]
|
| 581 |
34%|███▎ | 239/711 [23:47<40:22, 5.13s/it]
|
| 582 |
34%|███▍ | 240/
|
| 583 |
+
0: {'loss': 0.4155, 'grad_norm': 0.8314605620161184, 'learning_rate': 2e-06, 'memory/max_mem_active(gib)': 67.99, 'memory/max_mem_allocated(gib)': 67.66, 'memory/device_mem_reserved(gib)': 75.77, 'epoch': 0.2}
|
| 584 |
+
0: {'loss': 0.4045, 'grad_norm': 0.8328744939735258, 'learning_rate': 2e-06, 'memory/max_mem_active(gib)': 69.44, 'memory/max_mem_allocated(gib)': 67.66, 'memory/device_mem_reserved(gib)': 75.77, 'epoch': 0.21}
|
| 585 |
+
0: 711 [23:52<40:14, 5.13s/it]
|
| 586 |
|
| 587 |
34%|███▍ | 240/711 [23:52<40:14, 5.13s/it]
|
| 588 |
34%|███▍ | 241/711 [23:57<40:04, 5.12s/it]
|
| 589 |
34%|███▍ | 242/711 [24:03<40:30, 5.18s/it]
|
| 590 |
34%|███▍ | 243/711 [24:08<40:55, 5.25s/it]
|
| 591 |
34%|███▍ | 244/711 [24:13<40:38, 5.22s/it]
|
| 592 |
34%|███▍ | 245/711 [24:18<40:16, 5.18s/it]
|
| 593 |
35%|███▍ | 246/711 [24:23<40:06, 5.18s/it]
|
| 594 |
35%|███▍ | 247/711 [24:29<40:20, 5.22s/it]
|
| 595 |
35%|███▍ | 248/711 [24:34<39:58, 5.18s/it]
|
| 596 |
35%|███▌ | 249/711 [24:39<39:40, 5.15s/it]
|
| 597 |
35%|███▌ | 250/711 [24:44<39:34, 5.15s/it]
|
| 598 |
|
| 599 |
35%|███▌ | 250/711 [24:44<39:34, 5.15s/it]
|
| 600 |
35%|███▌ | 251/711 [24:49<39:23, 5.14s/it]
|
| 601 |
35%|███▌ | 252/711 [24:54<39:08, 5.12s/it]
|
| 602 |
36%|███▌ | 253/711 [24:59<39:13, 5.14s/it]
|
| 603 |
36%|███▌
|
| 604 |
+
0: {'loss': 0.4005, 'grad_norm': 0.8810433727017853, 'learning_rate': 2e-06, 'memory/max_mem_active(gib)': 69.44, 'memory/max_mem_allocated(gib)': 67.66, 'memory/device_mem_reserved(gib)': 75.77, 'epoch': 0.22}
|
| 605 |
+
0: | 254/711 [25:05<39:10, 5.14s/it]
|
| 606 |
36%|███▌ | 255/711 [25:10<39:07, 5.15s/it]
|
| 607 |
36%|███▌ | 256/711 [25:15<39:17, 5.18s/it]
|
| 608 |
36%|███▌ | 257/711 [25:20<38:57, 5.15s/it]
|
| 609 |
36%|███▋ | 258/711 [25:26<39:48, 5.27s/it]
|
| 610 |
36%|███▋ | 259/711 [25:31<39:17, 5.22s/it]
|
| 611 |
37%|███▋ | 260/711 [25:36<38:58, 5.19s/it]
|
| 612 |
|
| 613 |
37%|███▋ | 260/711 [25:36<38:58, 5.19s/it]
|
| 614 |
37%|███▋ | 261/711 [25:41<38:40, 5.16s/it]
|
| 615 |
37%|███▋ | 262/711 [25:46<38:26, 5.14s/it]
|
| 616 |
37%|███▋ | 263/711 [25:51<38:26, 5.15s/it]
|
| 617 |
37%|███▋ | 264/711 [25:56<38:28, 5.16s/it]
|
| 618 |
37%|███▋ | 265/711 [26:02<38:16, 5.15s/it]
|
| 619 |
37%|███▋ | 266/711 [26:07<38:12, 5.15s/it]
|
| 620 |
38%|███▊ | 267/711 [26:12<38:50, 5.25s/it]
|
| 621 |
38%|███▊ | 268/711 [26:18<39:17, 5.32s/it]
|
| 622 |
38%|███▊ | 269/711 [26:23<38:44, 5.26s/it]
|
| 623 |
38%|█�
|
| 624 |
+
0: {'loss': 0.4021, 'grad_norm': 1.0060252029086465, 'learning_rate': 2e-06, 'memory/max_mem_active(gib)': 69.44, 'memory/max_mem_allocated(gib)': 67.66, 'memory/device_mem_reserved(gib)': 75.77, 'epoch': 0.23}
|
| 625 |
+
0: [2025-11-24 00:38:52,072] [WARNING] [stage3.py:2150:step] 2 pytorch allocator cache flushes since last step. this happens when there is high memory pressure and is detrimental to performance. if this is happening frequently consider adjusting settings to reduce memory consumption. If you are unable to make the cache flushes go away consider adding get_accelerator().empty_cache() calls in your training loop to ensure that all ranks flush their caches at the same time
|
| 626 |
+
0: {'loss': 0.4124, 'grad_norm': 0.9014415482740915, 'learning_rate': 2e-06, 'memory/max_mem_active(gib)': 69.44, 'memory/max_mem_allocated(gib)': 67.66, 'memory/device_mem_reserved(gib)': 75.77, 'epoch': 0.24}
|
| 627 |
+
0: �█▊ | 270/711 [26:28<38:17, 5.21s/it]
|
| 628 |
|
| 629 |
38%|███▊ | 270/711 [26:28<38:17, 5.21s/it]
|
| 630 |
38%|███▊ | 271/711 [26:33<37:58, 5.18s/it]
|
| 631 |
38%|███▊ | 272/711 [26:38<37:42, 5.15s/it]
|
| 632 |
38%|███▊ | 273/711 [26:43<37:32, 5.14s/it]
|
| 633 |
39%|███▊ | 274/711 [26:49<39:58, 5.49s/it]
|
| 634 |
39%|███▊ | 275/711 [26:55<39:08, 5.39s/it]
|
| 635 |
39%|███▉ | 276/711 [27:00<38:28, 5.31s/it]
|
| 636 |
39%|███▉ | 277/711 [27:05<38:44, 5.36s/it]
|
| 637 |
39%|███▉ | 278/711 [27:10<38:16, 5.30s/it]
|
| 638 |
39%|███▉ | 279/711 [27:16<37:45, 5.24s/it]
|
| 639 |
39%|███▉ | 280/711 [27:21<37:26, 5.21s/it]
|
| 640 |
|
| 641 |
39%|███▉ | 280/711 [27:21<37:26, 5.21s/it]
|
| 642 |
40%|███▉ | 281/711 [27:26<38:16, 5.34s/it]
|
| 643 |
40%|███▉ | 282/711 [27:31<37:40, 5.27s/it]
|
| 644 |
40%|███▉ | 283/711 [27:37<37:37, 5.28s/it]
|
| 645 |
40%
|
| 646 |
+
0: {'loss': 0.3928, 'grad_norm': 1.1303634088009527, 'learning_rate': 2e-06, 'memory/max_mem_active(gib)': 69.44, 'memory/max_mem_allocated(gib)': 67.66, 'memory/device_mem_reserved(gib)': 75.77, 'epoch': 0.24}
|
| 647 |
+
0: |███▉ | 284/711 [27:42<37:57, 5.33s/it]
|
| 648 |
40%|████ | 285/711 [27:47<37:34, 5.29s/it]
|
| 649 |
40%|████ | 286/711 [27:53<37:14, 5.26s/it]
|
| 650 |
40%|████ | 287/711 [27:58<37:31, 5.31s/it]
|
| 651 |
41%|████ | 288/711 [28:03<37:04, 5.26s/it]
|
| 652 |
41%|████ | 289/711 [28:08<36:40, 5.21s/it]
|
| 653 |
41%|████ | 290/711 [28:13<36:19, 5.18s/it]
|
| 654 |
|
| 655 |
41%|████ | 290/711 [28:13<36:19, 5.18s/it]
|
| 656 |
41%|████ | 291/711 [28:19<36:32, 5.22s/it]
|
| 657 |
41%|████ | 292/711 [28:24<36:57, 5.29s/it]
|
| 658 |
41%|████ | 293/711 [28:29<36:57, 5.31s/it]
|
| 659 |
41%|████▏ | 294/711 [28:35<36:36, 5.27s/it]
|
| 660 |
41%|████▏ | 295/711 [28:40<36:18, 5.24s/it]
|
| 661 |
42%|████▏ | 296/711 [28:45<35:59, 5.20s/it]
|
| 662 |
42%|████▏ | 297/711 [28:50<36:22, 5.27s/it]
|
| 663 |
42%|████▏ | 298/711 [28:55<36:02, 5.24s/it]
|
| 664 |
42%|████▏ | 299/711 [29:
|
| 665 |
+
0: {'loss': 0.4025, 'grad_norm': 0.8789278025527175, 'learning_rate': 2e-06, 'memory/max_mem_active(gib)': 69.44, 'memory/max_mem_allocated(gib)': 67.66, 'memory/device_mem_reserved(gib)': 75.77, 'epoch': 0.25}
|
| 666 |
+
0: {'loss': 0.4015, 'grad_norm': 0.7615557087401322, 'learning_rate': 2e-06, 'memory/max_mem_active(gib)': 69.44, 'memory/max_mem_allocated(gib)': 67.66, 'memory/device_mem_reserved(gib)': 75.77, 'epoch': 0.26}
|
| 667 |
+
0: 01<35:58, 5.24s/it]
|
| 668 |
42%|████▏ | 300/711 [29:06<36:01, 5.26s/it]
|
| 669 |
|
| 670 |
42%|████▏ | 300/711 [29:06<36:01, 5.26s/it]
|
| 671 |
42%|████▏ | 301/711 [29:11<35:42, 5.23s/it]
|
| 672 |
42%|████▏ | 302/711 [29:16<35:28, 5.20s/it]
|
| 673 |
43%|████▎ | 303/711 [29:21<35:11, 5.17s/it]
|
| 674 |
43%|████▎ | 304/711 [29:27<35:03, 5.17s/it]
|
| 675 |
43%|████▎ | 305/711 [29:32<35:16, 5.21s/it]
|
| 676 |
43%|████▎ | 306/711 [29:37<34:57, 5.18s/it]
|
| 677 |
43%|████▎ | 307/711 [29:42<34:42, 5.15s/it]
|
| 678 |
43%|████▎ | 308/711 [29:47<34:33, 5.15s/it]
|
| 679 |
43%|████▎ | 309/711 [29:53<35:23, 5.28s/it]
|
| 680 |
44%|████▎ | 310/711 [29:59<36:23, 5.45s/it]
|
| 681 |
|
| 682 |
44%|████▎ | 310/711 [29:59<36:23, 5.45s/it]
|
| 683 |
44%|████▎ | 311/711 [30:04<35:40, 5.35s/it]
|
| 684 |
44%|████▍ | 312/711 [30:09<35:47, 5.38s/it]
|
| 685 |
|
| 686 |
+
0: {'loss': 0.4047, 'grad_norm': 1.0096950251075136, 'learning_rate': 2e-06, 'memory/max_mem_active(gib)': 69.44, 'memory/max_mem_allocated(gib)': 67.66, 'memory/device_mem_reserved(gib)': 75.77, 'epoch': 0.27}
|
| 687 |
+
0: 44%|████▍ | 313/711 [30:14<35:07, 5.30s/it]
|
| 688 |
44%|████▍ | 314/711 [30:20<35:18, 5.34s/it]
|
| 689 |
44%|████▍ | 315/711 [30:25<35:09, 5.33s/it]
|
| 690 |
44%|████▍ | 316/711 [30:30<34:42, 5.27s/it]
|
| 691 |
45%|████▍ | 317/711 [30:35<34:17, 5.22s/it]
|
| 692 |
45%|████▍ | 318/711 [30:40<34:02, 5.20s/it]
|
| 693 |
45%|████▍ | 319/711 [30:46<33:48, 5.18s/it]
|
| 694 |
45%|████▌ | 320/711 [30:51<33:36, 5.16s/it]
|
| 695 |
|
| 696 |
45%|████▌ | 320/711 [30:51<33:36, 5.16s/it]
|
| 697 |
45%|████▌ | 321/711 [30:56<33:30, 5.16s/it]
|
| 698 |
45%|████▌ | 322/711 [31:01<33:19, 5.14s/it]
|
| 699 |
45%|████▌ | 323/711 [31:06<33:37, 5.20s/it]
|
| 700 |
46%|████▌ | 324/711 [31:11<33:25, 5.18s/it]
|
| 701 |
46%|████▌ | 325/711 [31:17<33:17, 5.17s/it]
|
| 702 |
46%|████▌ | 326/711 [31:22<33:09, 5.17s/it]
|
| 703 |
46%|████▌ | 327/711 [31:27<33:08, 5.18s/it]
|
| 704 |
46%|███
|
| 705 |
+
0: {'loss': 0.4025, 'grad_norm': 0.9227721040091849, 'learning_rate': 2e-06, 'memory/max_mem_active(gib)': 69.44, 'memory/max_mem_allocated(gib)': 67.66, 'memory/device_mem_reserved(gib)': 75.77, 'epoch': 0.28}
|
| 706 |
+
0: {'loss': 0.3987, 'grad_norm': 1.8038936518267323, 'learning_rate': 2e-06, 'memory/max_mem_active(gib)': 69.44, 'memory/max_mem_allocated(gib)': 67.66, 'memory/device_mem_reserved(gib)': 75.77, 'epoch': 0.29}
|
| 707 |
+
0: █▌ | 328/711 [31:32<32:55, 5.16s/it]
|
| 708 |
46%|████▋ | 329/711 [31:37<32:44, 5.14s/it]
|
| 709 |
46%|████▋ | 330/711 [31:42<32:58, 5.19s/it]
|
| 710 |
|
| 711 |
46%|████▋ | 330/711 [31:42<32:58, 5.19s/it]
|
| 712 |
47%|████▋ | 331/711 [31:48<32:46, 5.18s/it]
|
| 713 |
47%|████▋ | 332/711 [31:53<32:35, 5.16s/it]
|
| 714 |
47%|████▋ | 333/711 [31:58<33:05, 5.25s/it]
|
| 715 |
47%|████▋ | 334/711 [32:03<32:47, 5.22s/it]
|
| 716 |
47%|████▋ | 335/711 [32:09<33:10, 5.29s/it]
|
| 717 |
47%|████▋ | 336/711 [32:14<32:41, 5.23s/it]
|
| 718 |
47%|████▋ | 337/711 [32:19<32:21, 5.19s/it]
|
| 719 |
48%|████▊ | 338/711 [32:24<32:19, 5.20s/it]
|
| 720 |
48%|████▊ | 339/711 [32:29<32:05, 5.18s/it]
|
| 721 |
48%|████▊ | 340/711 [32:34<31:51, 5.15s/it]
|
| 722 |
|
| 723 |
48%|████▊ | 340/711 [32:34<31:51, 5.15s/it]
|
| 724 |
48%|████▊ | 341/711 [
|
| 725 |
+
0: {'loss': 0.4004, 'grad_norm': 0.8530478906547682, 'learning_rate': 2e-06, 'memory/max_mem_active(gib)': 69.44, 'memory/max_mem_allocated(gib)': 67.66, 'memory/device_mem_reserved(gib)': 75.77, 'epoch': 0.3}
|
| 726 |
+
0: 32:40<31:54, 5.17s/it]
|
| 727 |
48%|████▊ | 342/711 [32:45<31:45, 5.16s/it]
|
| 728 |
48%|████▊ | 343/711 [32:50<31:37, 5.16s/it]
|
| 729 |
48%|████▊ | 344/711 [32:55<31:44, 5.19s/it]
|
| 730 |
49%|████▊ | 345/711 [33:00<31:29, 5.16s/it]
|
| 731 |
49%|████▊ | 346/711 [33:05<31:18, 5.15s/it]
|
| 732 |
49%|████▉ | 347/711 [33:10<31:07, 5.13s/it]
|
| 733 |
49%|████▉ | 348/711 [33:16<30:59, 5.12s/it]
|
| 734 |
49%|████▉ | 349/711 [33:21<30:57, 5.13s/it]
|
| 735 |
49%|████▉ | 350/711 [33:26<30:52, 5.13s/it]
|
| 736 |
|
| 737 |
49%|████▉ | 350/711 [33:26<30:52, 5.13s/it]
|
| 738 |
49%|████▉ | 351/711 [33:31<30:45, 5.13s/it]
|
| 739 |
50%|████▉ | 352/711 [33:36<30:36, 5.12s/it]
|
| 740 |
50%|████▉ | 353/711 [33:41<30:32, 5.12s/it]
|
| 741 |
50%|████▉ | 354/711 [33:46<30:26, 5.12s/it]
|
| 742 |
50%|████▉ | 355/711 [33:51<30:20, 5.11s/it]
|
| 743 |
50%|█████ | 356/711 [33:57<30:13,
|
| 744 |
+
0: {'loss': 0.4055, 'grad_norm': 0.8072887895552343, 'learning_rate': 2e-06, 'memory/max_mem_active(gib)': 69.44, 'memory/max_mem_allocated(gib)': 67.66, 'memory/device_mem_reserved(gib)': 75.77, 'epoch': 0.3}
|
| 745 |
+
0: 5.11s/it]
|
| 746 |
50%|█████ | 357/711 [34:02<30:10, 5.11s/it]
|
| 747 |
50%|█████ | 358/711 [34:07<30:07, 5.12s/it]
|
| 748 |
50%|█████ | 359/711 [34:12<30:03, 5.12s/it]
|
| 749 |
51%|█████ | 360/711 [34:17<29:57, 5.12s/it]
|
| 750 |
|
| 751 |
51%|█████ | 360/711 [34:17<29:57, 5.12s/it]
|
| 752 |
51%|█████ | 361/711 [34:22<29:53, 5.12s/it]
|
| 753 |
51%|█████ | 362/711 [34:27<29:46, 5.12s/it]
|
| 754 |
51%|█████ | 363/711 [34:32<29:41, 5.12s/it]
|
| 755 |
51%|█████ | 364/711 [34:38<29:39, 5.13s/it]
|
| 756 |
51%|█████▏ | 365/711 [34:43<29:59, 5.20s/it]
|
| 757 |
51%|█████▏ | 366/711 [34:48<29:44, 5.17s/it]
|
| 758 |
52%|█████▏ | 367/711 [34:53<29:30, 5.15s/it]
|
| 759 |
52%|█████▏ | 368/711 [34:58<29:27, 5.15s/it]
|
| 760 |
52%|█████▏ | 369/711 [35:03<29:14, 5.13s/it]
|
| 761 |
52%|█████▏ | 370/711 [35:09<29:11, 5.14s/it]
|
| 762 |
|
| 763 |
+
0: {'loss': 0.4024, 'grad_norm': 0.8486839849343547, 'learning_rate': 2e-06, 'memory/max_mem_active(gib)': 69.44, 'memory/max_mem_allocated(gib)': 67.66, 'memory/device_mem_reserved(gib)': 75.77, 'epoch': 0.31}
|
| 764 |
+
0: {'loss': 0.4021, 'grad_norm': 0.8529581759108179, 'learning_rate': 2e-06, 'memory/max_mem_active(gib)': 69.44, 'memory/max_mem_allocated(gib)': 67.66, 'memory/device_mem_reserved(gib)': 75.77, 'epoch': 0.32}
|
| 765 |
+
0:
|
| 766 |
52%|█████▏ | 370/711 [35:09<29:11, 5.14s/it]
|
| 767 |
52%|█████▏ | 371/711 [35:14<29:01, 5.12s/it]
|
| 768 |
52%|█████▏ | 372/711 [35:19<28:54, 5.12s/it]
|
| 769 |
52%|█████▏ | 373/711 [35:24<29:01, 5.15s/it]
|
| 770 |
53%|█████▎ | 374/711 [35:29<28:53, 5.14s/it]
|
| 771 |
53%|█████▎ | 375/711 [35:34<28:43, 5.13s/it]
|
| 772 |
53%|█████▎ | 376/711 [35:39<28:33, 5.11s/it]
|
| 773 |
53%|█████▎ | 377/711 [35:44<28:25, 5.11s/it]
|
| 774 |
53%|█████▎ | 378/711 [35:49<28:21, 5.11s/it]
|
| 775 |
53%|█████▎ | 379/711 [35:55<28:52, 5.22s/it]
|
| 776 |
53%|█████▎ | 380/711 [36:00<28:33, 5.18s/it]
|
| 777 |
|
| 778 |
53%|█████▎ | 380/711 [36:00<28:33, 5.18s/it]
|
| 779 |
54%|█████▎ | 381/711 [36:05<28:53, 5.25s/it]
|
| 780 |
54%|█████▎ | 382/711 [36:11<28:32, 5.21s/it]
|
| 781 |
54%|█████▍ | 383/711 [36:16<28:38, 5.24s/it]
|
| 782 |
54%|█████▍ | 384/711 [36:2
|
| 783 |
+
0: {'loss': 0.3972, 'grad_norm': 0.8357610935717936, 'learning_rate': 2e-06, 'memory/max_mem_active(gib)': 69.44, 'memory/max_mem_allocated(gib)': 67.66, 'memory/device_mem_reserved(gib)': 75.77, 'epoch': 0.33}
|
| 784 |
+
0: 1<28:17, 5.19s/it]
|
| 785 |
54%|█████▍ | 385/711 [36:26<28:02, 5.16s/it]
|
| 786 |
54%|█████▍ | 386/711 [36:31<27:52, 5.15s/it]
|
| 787 |
54%|█████▍ | 387/711 [36:36<27:46, 5.14s/it]
|
| 788 |
55%|█████▍ | 388/711 [36:41<27:35, 5.12s/it]
|
| 789 |
55%|█████▍ | 389/711 [36:47<27:35, 5.14s/it]
|
| 790 |
55%|█████▍ | 390/711 [36:52<27:38, 5.17s/it]
|
| 791 |
|
| 792 |
55%|█████▍ | 390/711 [36:52<27:38, 5.17s/it]
|
| 793 |
55%|█████▍ | 391/711 [36:57<27:26, 5.14s/it]
|
| 794 |
55%|█████▌ | 392/711 [37:02<27:35, 5.19s/it]
|
| 795 |
55%|█████▌ | 393/711 [37:07<27:27, 5.18s/it]
|
| 796 |
55%|█████▌ | 394/711 [37:12<27:12, 5.15s/it]
|
| 797 |
56%|█████▌ | 395/711 [37:18<27:39, 5.25s/it]
|
| 798 |
56%|█████▌ | 396/711 [37:23<27:19, 5.20s/it]
|
| 799 |
56%|█████▌ | 397/711 [37:28<27:03, 5.17s/it]
|
| 800 |
56%|█████▌ | 398/711 [37:33<26:52, 5.15s/it]
|
| 801 |
56%|█████▌
|
| 802 |
+
0: {'loss': 0.3859, 'grad_norm': 0.8058568338786659, 'learning_rate': 2e-06, 'memory/max_mem_active(gib)': 69.44, 'memory/max_mem_allocated(gib)': 67.66, 'memory/device_mem_reserved(gib)': 75.77, 'epoch': 0.34}
|
| 803 |
+
0: {'loss': 0.3898, 'grad_norm': 0.7954384150397931, 'learning_rate': 2e-06, 'memory/max_mem_active(gib)': 69.44, 'memory/max_mem_allocated(gib)': 67.66, 'memory/device_mem_reserved(gib)': 75.77, 'epoch': 0.35}
|
| 804 |
+
0: | 399/711 [37:38<26:46, 5.15s/it]
|
| 805 |
56%|█████▋ | 400/711 [37:43<26:35, 5.13s/it]
|
| 806 |
|
| 807 |
56%|█████▋ | 400/711 [37:43<26:35, 5.13s/it]
|
| 808 |
56%|█████▋ | 401/711 [37:48<26:27, 5.12s/it]
|
| 809 |
57%|█████▋ | 402/711 [37:54<26:18, 5.11s/it]
|
| 810 |
57%|█████▋ | 403/711 [37:59<26:11, 5.10s/it]
|
| 811 |
57%|█████▋ | 404/711 [38:04<26:10, 5.11s/it]
|
| 812 |
57%|█████▋ | 405/711 [38:09<26:34, 5.21s/it]
|
| 813 |
57%|█████▋ | 406/711 [38:14<26:25, 5.20s/it]
|
| 814 |
57%|█████▋ | 407/711 [38:19<26:11, 5.17s/it]
|
| 815 |
57%|█████▋ | 408/711 [38:25<26:03, 5.16s/it]
|
| 816 |
58%|█████▊ | 409/711 [38:30<26:30, 5.27s/it]
|
| 817 |
58%|█████▊ | 410/711 [38:35<26:14, 5.23s/it]
|
| 818 |
|
| 819 |
58%|█████▊ | 410/711 [38:35<26:14, 5.23s/it]
|
| 820 |
58%|█████▊ | 411/711 [38:41<26:30, 5.30s/it]
|
| 821 |
58%|███�
|
| 822 |
+
0: {'loss': 0.3925, 'grad_norm': 0.8145567494437453, 'learning_rate': 2e-06, 'memory/max_mem_active(gib)': 69.44, 'memory/max_mem_allocated(gib)': 67.66, 'memory/device_mem_reserved(gib)': 75.77, 'epoch': 0.35}
|
| 823 |
+
0: ��█▊ | 412/711 [38:46<26:10, 5.25s/it]
|
| 824 |
58%|█████▊ | 413/711 [38:51<25:55, 5.22s/it]
|
| 825 |
58%|█████▊ | 414/711 [38:56<25:40, 5.19s/it]
|
| 826 |
58%|█████▊ | 415/711 [39:01<25:28, 5.16s/it]
|
| 827 |
59%|█████▊ | 416/711 [39:07<25:49, 5.25s/it]
|
| 828 |
59%|█████▊ | 417/711 [39:12<25:31, 5.21s/it]
|
| 829 |
59%|█████▉ | 418/711 [39:17<25:17, 5.18s/it]
|
| 830 |
59%|█████▉ | 419/711 [39:22<25:06, 5.16s/it]
|
| 831 |
59%|█████▉ | 420/711 [39:27<24:59, 5.15s/it]
|
| 832 |
|
| 833 |
59%|█████▉ | 420/711 [39:27<24:59, 5.15s/it]
|
| 834 |
59%|█████▉ | 421/711 [39:32<24:49, 5.14s/it]
|
| 835 |
59%|█████▉ | 422/711 [39:37<24:44, 5.14s/it]
|
| 836 |
59%|█████▉ | 423/711 [39:43<24:35, 5.12s/it]
|
| 837 |
60%|█████▉ | 424/711 [39:48<24:42, 5.17s/it]
|
| 838 |
60%|█████▉ | 425/711 [39:53<24:34, 5.16s/it]
|
| 839 |
60%|█████▉ | 426/711 [39:58<24:30, 5.16s/
|
| 840 |
+
0: {'loss': 0.3927, 'grad_norm': 0.8237856091804933, 'learning_rate': 2e-06, 'memory/max_mem_active(gib)': 69.44, 'memory/max_mem_allocated(gib)': 67.66, 'memory/device_mem_reserved(gib)': 75.77, 'epoch': 0.36}
|
| 841 |
+
0: it]
|
| 842 |
60%|██████ | 427/711 [40:03<24:40, 5.21s/it]
|
| 843 |
60%|██████ | 428/711 [40:09<24:55, 5.28s/it]
|
| 844 |
60%|██████ | 429/711 [40:14<24:33, 5.23s/it]
|
| 845 |
60%|██████ | 430/711 [40:19<24:23, 5.21s/it]
|
| 846 |
|
| 847 |
60%|██████ | 430/711 [40:19<24:23, 5.21s/it]
|
| 848 |
61%|██████ | 431/711 [40:24<24:10, 5.18s/it]
|
| 849 |
61%|██████ | 432/711 [40:29<23:56, 5.15s/it]
|
| 850 |
61%|██████ | 433/711 [40:34<23:50, 5.14s/it]
|
| 851 |
61%|██████ | 434/711 [40:40<23:39, 5.12s/it]
|
| 852 |
61%|██████ | 435/711 [40:45<23:32, 5.12s/it]
|
| 853 |
61%|██████▏ | 436/711 [40:50<24:14, 5.29s/it]
|
| 854 |
61%|██████▏ | 437/711 [40:56<24:11, 5.30s/it]
|
| 855 |
62%|██████▏ | 438/711 [41:01<23:49, 5.24s/it]
|
| 856 |
62%|██████▏ | 439/711 [41:06<23:33, 5.20s/it]
|
| 857 |
62%|██████▏ | 440/711 [41:11<23:37, 5.23s/it]
|
| 858 |
|
| 859 |
+
0: {'loss': 0.3937, 'grad_norm': 0.8553439901672909, 'learning_rate': 2e-06, 'memory/max_mem_active(gib)': 69.44, 'memory/max_mem_allocated(gib)': 67.66, 'memory/device_mem_reserved(gib)': 75.77, 'epoch': 0.37}
|
| 860 |
+
0: {'loss': 0.3873, 'grad_norm': 0.8286249080798415, 'learning_rate': 2e-06, 'memory/max_mem_active(gib)': 69.44, 'memory/max_mem_allocated(gib)': 67.66, 'memory/device_mem_reserved(gib)': 75.77, 'epoch': 0.38}
|
| 861 |
+
0:
|
| 862 |
62%|██████▏ | 440/711 [41:11<23:37, 5.23s/it]
|
| 863 |
62%|██████▏ | 441/711 [41:16<23:23, 5.20s/it]
|
| 864 |
62%|██████▏ | 442/711 [41:21<23:13, 5.18s/it]
|
| 865 |
62%|██████▏ | 443/711 [41:27<23:01, 5.16s/it]
|
| 866 |
62%|██████▏ | 444/711 [41:32<22:57, 5.16s/it]
|
| 867 |
63%|██████▎ | 445/711 [41:37<23:14, 5.24s/it]
|
| 868 |
63%|██████▎ | 446/711 [41:42<22:58, 5.20s/it]
|
| 869 |
63%|██████▎ | 447/711 [41:47<22:45, 5.17s/it]
|
| 870 |
63%|██████▎ | 448/711 [41:52<22:34, 5.15s/it]
|
| 871 |
63%|██████▎ | 449/711 [41:58<22:43, 5.20s/it]
|
| 872 |
63%|██████▎ | 450/711 [42:03<22:32, 5.18s/it]
|
| 873 |
|
| 874 |
63%|██████▎ | 450/711 [42:03<22:32, 5.18s/it]
|
| 875 |
63%|██████▎ | 451/711 [42:08<22:20, 5.16s/it]
|
| 876 |
64%|██████▎ | 452/711 [42:13<22:14, 5.15s/it]
|
| 877 |
64%|██████▎ | 453/711 [42:18<22:09, 5
|
| 878 |
+
0: {'loss': 0.384, 'grad_norm': 0.8623716385758442, 'learning_rate': 2e-06, 'memory/max_mem_active(gib)': 69.44, 'memory/max_mem_allocated(gib)': 67.66, 'memory/device_mem_reserved(gib)': 75.77, 'epoch': 0.39}
|
| 879 |
+
0: .15s/it]
|
| 880 |
64%|██████▍ | 454/711 [42:23<22:03, 5.15s/it]
|
| 881 |
64%|██████▍ | 455/711 [42:29<21:56, 5.14s/it]
|
| 882 |
64%|██████▍ | 456/711 [42:34<21:51, 5.14s/it]
|
| 883 |
64%|██████▍ | 457/711 [42:39<21:44, 5.14s/it]
|
| 884 |
64%|██████▍ | 458/711 [42:44<21:40, 5.14s/it]
|
| 885 |
65%|██████▍ | 459/711 [42:49<21:39, 5.16s/it]
|
| 886 |
65%|██████▍ | 460/711 [42:54<21:46, 5.21s/it]
|
| 887 |
|
| 888 |
65%|██████▍ | 460/711 [42:54<21:46, 5.21s/it]
|
| 889 |
65%|██████▍ | 461/711 [43:00<21:38, 5.19s/it]
|
| 890 |
65%|██████▍ | 462/711 [43:05<21:30, 5.18s/it]
|
| 891 |
65%|██████▌ | 463/711 [43:10<21:25, 5.18s/it]
|
| 892 |
65%|██████▌ | 464/711 [43:15<21:17, 5.17s/it]
|
| 893 |
65%|██████▌ | 465/711 [43:20<21:05, 5.15s/it]
|
| 894 |
66%|██████▌ | 466/711 [43:25<20:58, 5.14s/it]
|
| 895 |
66%|██████▌ | 467/711 [43:30<20:53, 5.14s/it]
|
| 896 |
66%
|
| 897 |
+
0: {'loss': 0.3893, 'grad_norm': 0.7980262942281969, 'learning_rate': 2e-06, 'memory/max_mem_active(gib)': 69.44, 'memory/max_mem_allocated(gib)': 67.66, 'memory/device_mem_reserved(gib)': 75.77, 'epoch': 0.4}
|
| 898 |
+
0: {'loss': 0.3928, 'grad_norm': 0.9024656134697462, 'learning_rate': 2e-06, 'memory/max_mem_active(gib)': 69.44, 'memory/max_mem_allocated(gib)': 67.66, 'memory/device_mem_reserved(gib)': 75.77, 'epoch': 0.4}
|
| 899 |
+
0: |██████▌ | 468/711 [43:36<20:50, 5.14s/it]
|
| 900 |
66%|██████▌ | 469/711 [43:41<21:10, 5.25s/it]
|
| 901 |
66%|██████▌ | 470/711 [43:46<20:53, 5.20s/it]
|
| 902 |
|
| 903 |
66%|██████▌ | 470/711 [43:46<20:53, 5.20s/it]
|
| 904 |
66%|██████▌ | 471/711 [43:51<20:45, 5.19s/it]
|
| 905 |
66%|██████▋ | 472/711 [43:57<20:40, 5.19s/it]
|
| 906 |
67%|██████▋ | 473/711 [44:02<20:28, 5.16s/it]
|
| 907 |
67%|██████▋ | 474/711 [44:07<20:23, 5.16s/it]
|
| 908 |
67%|██████▋ | 475/711 [44:12<20:12, 5.14s/it]
|
| 909 |
67%|██████▋ | 476/711 [44:18<20:43, 5.29s/it]
|
| 910 |
67%|██████▋ | 477/711 [44:23<20:23, 5.23s/it]
|
| 911 |
67%|██████▋ | 478/711 [44:28<20:24, 5.26s/it]
|
| 912 |
67%|██████▋ | 479/711 [44:33<20:10, 5.22s/it]
|
| 913 |
68%|██████▊ | 480/711 [44:38<19:59, 5.19s/it]
|
| 914 |
|
| 915 |
68%|██████▊ |
|
| 916 |
+
0: {'loss': 0.3747, 'grad_norm': 1.4532167164219425, 'learning_rate': 2e-06, 'memory/max_mem_active(gib)': 69.44, 'memory/max_mem_allocated(gib)': 67.66, 'memory/device_mem_reserved(gib)': 75.77, 'epoch': 0.41}
|
| 917 |
+
0: 480/711 [44:38<19:59, 5.19s/it]
|
| 918 |
68%|██████▊ | 481/711 [44:44<20:13, 5.28s/it]
|
| 919 |
68%|██████▊ | 482/711 [44:49<20:00, 5.24s/it]
|
| 920 |
68%|██████▊ | 483/711 [44:54<19:48, 5.21s/it]
|
| 921 |
68%|██████▊ | 484/711 [44:59<19:39, 5.20s/it]
|
| 922 |
68%|██████▊ | 485/711 [45:04<19:33, 5.19s/it]
|
| 923 |
68%|██████▊ | 486/711 [45:10<19:26, 5.18s/it]
|
| 924 |
68%|██████▊ | 487/711 [45:15<19:19, 5.18s/it]
|
| 925 |
69%|██████▊ | 488/711 [45:20<19:09, 5.15s/it]
|
| 926 |
69%|██████▉ | 489/711 [45:25<19:24, 5.25s/it]
|
| 927 |
69%|██████▉ | 490/711 [45:30<19:09, 5.20s/it]
|
| 928 |
|
| 929 |
69%|██████▉ | 490/711 [45:30<19:09, 5.20s/it]
|
| 930 |
69%|██████▉ | 491/711 [45:36<19:16, 5.26s/it]
|
| 931 |
69%|██████▉ | 492/711 [45:41<19:00, 5.21s/it]
|
| 932 |
69%|██████▉ | 493/711 [45:46<18:47, 5.17s/it]
|
| 933 |
69%|██████▉ | 494/711 [45:
|
| 934 |
+
0: {'loss': 0.3797, 'grad_norm': 0.8355553409451639, 'learning_rate': 2e-06, 'memory/max_mem_active(gib)': 69.44, 'memory/max_mem_allocated(gib)': 67.66, 'memory/device_mem_reserved(gib)': 75.77, 'epoch': 0.42}
|
| 935 |
+
0: 51<19:02, 5.27s/it]
|
| 936 |
70%|██████▉ | 495/711 [45:56<18:45, 5.21s/it]
|
| 937 |
70%|██████▉ | 496/711 [46:02<18:34, 5.19s/it]
|
| 938 |
70%|██████▉ | 497/711 [46:07<18:38, 5.22s/it]
|
| 939 |
70%|███████ | 498/711 [46:12<18:25, 5.19s/it]
|
| 940 |
70%|███████ | 499/711 [46:18<18:55, 5.35s/it]
|
| 941 |
70%|███████ | 500/711 [46:23<18:34, 5.28s/it]
|
| 942 |
|
| 943 |
70%|███████ | 500/711 [46:23<18:34, 5.28s/it]
|
| 944 |
70%|███████ | 501/711 [46:28<18:22, 5.25s/it]
|
| 945 |
71%|███████ | 502/711 [46:33<18:12, 5.23s/it]
|
| 946 |
71%|███████ | 503/711 [46:38<18:04, 5.21s/it]
|
| 947 |
71%|███████ | 504/711 [46:44<17:53, 5.19s/it]
|
| 948 |
71%|███████ | 505/711 [46:49<17:43, 5.16s/it]
|
| 949 |
71%|███████ | 506/711 [46:54<17:36, 5.16s/it]
|
| 950 |
71%|███████▏ | 507/711 [46:59<17:29, 5.14s/it]
|
| 951 |
71%|███████▏ | 508/711 [47:04<17:21,
|
| 952 |
+
0: {'loss': 0.3917, 'grad_norm': 0.8425621928447311, 'learning_rate': 2e-06, 'memory/max_mem_active(gib)': 69.44, 'memory/max_mem_allocated(gib)': 67.66, 'memory/device_mem_reserved(gib)': 75.77, 'epoch': 0.43}
|
| 953 |
+
0: {'loss': 0.3825, 'grad_norm': 0.7816311224212293, 'learning_rate': 1.9929032311830302e-06, 'memory/max_mem_active(gib)': 69.44, 'memory/max_mem_allocated(gib)': 67.66, 'memory/device_mem_reserved(gib)': 75.77, 'epoch': 0.44}
|
| 954 |
+
0: 5.13s/it]
|
| 955 |
72%|███████▏ | 509/711 [47:09<17:16, 5.13s/it]
|
| 956 |
72%|███████▏ | 510/711 [47:14<17:09, 5.12s/it]
|
| 957 |
|
| 958 |
72%|███████▏ | 510/711 [47:14<17:09, 5.12s/it]
|
| 959 |
72%|███████▏ | 511/711 [47:20<17:21, 5.21s/it]
|
| 960 |
72%|███████▏ | 512/711 [47:25<17:12, 5.19s/it]
|
| 961 |
72%|███████▏ | 513/711 [47:30<17:00, 5.16s/it]
|
| 962 |
72%|███████▏ | 514/711 [47:35<16:51, 5.14s/it]
|
| 963 |
72%|███████▏ | 515/711 [47:40<16:45, 5.13s/it]
|
| 964 |
73%|███████▎ | 516/711 [47:45<16:40, 5.13s/it]
|
| 965 |
73%|███████▎ | 517/711 [47:50<16:32, 5.12s/it]
|
| 966 |
73%|███████▎ | 518/711 [47:55<16:30, 5.13s/it]
|
| 967 |
73%|███████▎ | 519/711 [48:01<16:25, 5.13s/it]
|
| 968 |
73%|███████▎ | 520/711 [48:06<16:42, 5.25s/it]
|
| 969 |
|
| 970 |
73%|███████▎ | 520/711 [48:06<16:4
|
| 971 |
+
0: {'loss': 0.3819, 'grad_norm': 0.8516269037345293, 'learning_rate': 1.9642643171092486e-06, 'memory/max_mem_active(gib)': 69.44, 'memory/max_mem_allocated(gib)': 67.66, 'memory/device_mem_reserved(gib)': 75.77, 'epoch': 0.45}
|
| 972 |
+
0: 2, 5.25s/it]
|
| 973 |
73%|███████▎ | 521/711 [48:11<16:30, 5.21s/it]
|
| 974 |
73%|███████▎ | 522/711 [48:16<16:18, 5.18s/it]
|
| 975 |
74%|███████▎ | 523/711 [48:22<16:20, 5.22s/it]
|
| 976 |
74%|███████▎ | 524/711 [48:27<16:36, 5.33s/it]
|
| 977 |
74%|███████▍ | 525/711 [48:32<16:20, 5.27s/it]
|
| 978 |
74%|███████▍ | 526/711 [48:37<16:07, 5.23s/it]
|
| 979 |
74%|███████▍ | 527/711 [48:43<15:53, 5.18s/it]
|
| 980 |
74%|███████▍ | 528/711 [48:48<15:54, 5.22s/it]
|
| 981 |
74%|███████▍ | 529/711 [48:53<15:42, 5.18s/it]
|
| 982 |
75%|███████▍ | 530/711 [48:58<15:38, 5.18s/it]
|
| 983 |
|
| 984 |
75%|███████▍ | 530/711 [48:58<15:38, 5.18s/it]
|
| 985 |
75%|███████▍ | 531/711 [49:03<15:27, 5.15s/it]
|
| 986 |
75%|███████▍ | 532/711 [49:08<15:19, 5.14s/it]
|
| 987 |
75%|███████▍ | 533/711 [49:13<15:11, 5.12s/it]
|
| 988 |
75%|███████▌ | 53
|
| 989 |
+
0: {'loss': 0.3918, 'grad_norm': 0.9451621145813273, 'learning_rate': 1.9143443472194176e-06, 'memory/max_mem_active(gib)': 69.44, 'memory/max_mem_allocated(gib)': 67.66, 'memory/device_mem_reserved(gib)': 75.77, 'epoch': 0.46}
|
| 990 |
+
0: 4/711 [49:18<15:05, 5.12s/it]
|
| 991 |
75%|███████▌ | 535/711 [49:24<14:58, 5.11s/it]
|
| 992 |
75%|███████▌ | 536/711 [49:29<15:04, 5.17s/it]
|
| 993 |
76%|███████▌ | 537/711 [49:34<14:59, 5.17s/it]
|
| 994 |
76%|███████▌ | 538/711 [49:39<14:48, 5.14s/it]
|
| 995 |
76%|███████▌ | 539/711 [49:44<14:46, 5.16s/it]
|
| 996 |
76%|███████▌ | 540/711 [49:49<14:41, 5.15s/it]
|
| 997 |
|
| 998 |
76%|███████▌ | 540/711 [49:49<14:41, 5.15s/it]
|
| 999 |
76%|███████▌ | 541/711 [49:55<14:33, 5.14s/it]
|
| 1000 |
76%|███████▌ | 542/711 [50:00<14:26, 5.13s/it]
|
| 1001 |
76%|███████▋ | 543/711 [50:05<14:23, 5.14s/it]
|
| 1002 |
77%|███████▋ | 544/711 [50:10<14:17, 5.13s/it]
|
| 1003 |
77%|███████▋ | 545/711 [50:15<14:10, 5.12s/it]
|
| 1004 |
77%|███████▋ | 546/711 [50:20<14:04, 5.12s/it]
|
| 1005 |
77%|███████▋ | 547/711 [50:25<13:59, 5.12s/it]
|
| 1006 |
77%|████�
|
| 1007 |
+
0: {'loss': 0.3907, 'grad_norm': 0.8481507125427856, 'learning_rate': 1.8443725168471053e-06, 'memory/max_mem_active(gib)': 69.44, 'memory/max_mem_allocated(gib)': 67.66, 'memory/device_mem_reserved(gib)': 75.77, 'epoch': 0.46}
|
| 1008 |
+
0: {'loss': 0.3803, 'grad_norm': 0.8683953024369212, 'learning_rate': 1.7560717646792703e-06, 'memory/max_mem_active(gib)': 69.44, 'memory/max_mem_allocated(gib)': 67.66, 'memory/device_mem_reserved(gib)': 75.77, 'epoch': 0.47}
|
| 1009 |
+
0: ��██▋ | 548/711 [50:30<13:52, 5.11s/it]
|
| 1010 |
77%|███████▋ | 549/711 [50:35<13:47, 5.11s/it]
|
| 1011 |
77%|███████▋ | 550/711 [50:41<13:42, 5.11s/it]
|
| 1012 |
|
| 1013 |
77%|███████▋ | 550/711 [50:41<13:42, 5.11s/it]
|
| 1014 |
77%|███████▋ | 551/711 [50:46<13:40, 5.13s/it]
|
| 1015 |
78%|███████▊ | 552/711 [50:51<13:36, 5.13s/it]
|
| 1016 |
78%|███████▊ | 553/711 [50:56<13:29, 5.12s/it]
|
| 1017 |
78%|███████▊ | 554/711 [51:01<13:24, 5.12s/it]
|
| 1018 |
78%|███████▊ | 555/711 [51:06<13:21, 5.14s/it]
|
| 1019 |
78%|███████▊ | 556/711 [51:11<13:14, 5.12s/it]
|
| 1020 |
78%|███████▊ | 557/711 [51:17<13:25, 5.23s/it]
|
| 1021 |
78%|███████▊ | 558/711 [51:22<13:32, 5.31s/it]
|
| 1022 |
79%|███████▊ | 559/711 [51:27<13:18, 5.25s/it]
|
| 1023 |
79%|███████▉ | 560/711 [51:33<13:06, 5.21s/it]
|
| 1024 |
|
| 1025 |
79%|███�
|
| 1026 |
+
0: {'loss': 0.3904, 'grad_norm': 0.8636181194234771, 'learning_rate': 1.6516163482876789e-06, 'memory/max_mem_active(gib)': 69.44, 'memory/max_mem_allocated(gib)': 67.66, 'memory/device_mem_reserved(gib)': 75.77, 'epoch': 0.48}
|
| 1027 |
+
0: �███▉ | 560/711 [51:33<13:06, 5.21s/it]
|
| 1028 |
79%|███████▉ | 561/711 [51:38<12:58, 5.19s/it]
|
| 1029 |
79%|███████▉ | 562/711 [51:43<13:13, 5.33s/it]
|
| 1030 |
79%|███████▉ | 563/711 [51:49<13:11, 5.35s/it]
|
| 1031 |
79%|███████▉ | 564/711 [51:54<12:58, 5.29s/it]
|
| 1032 |
79%|███████▉ | 565/711 [51:59<12:47, 5.26s/it]
|
| 1033 |
80%|███████▉ | 566/711 [52:04<12:34, 5.21s/it]
|
| 1034 |
80%|███████▉ | 567/711 [52:10<12:40, 5.28s/it]
|
| 1035 |
80%|███████▉ | 568/711 [52:15<12:28, 5.23s/it]
|
| 1036 |
80%|████████ | 569/711 [52:20<12:27, 5.26s/it]
|
| 1037 |
80%|████████ | 570/711 [52:25<12:14, 5.21s/it]
|
| 1038 |
|
| 1039 |
80%|████████ | 570/711 [52:25<12:14, 5.21s/it]
|
| 1040 |
80%|████████ | 571/711 [52:31<12:14, 5.24s/it]
|
| 1041 |
80%|████████ | 572/711 [52:36<12:04, 5.21s/it]
|
| 1042 |
81%|████████ | 573/711 [52:41<12:02, 5.24s/it]
|
| 1043 |
+
0: {'loss': 0.3828, 'grad_norm': 0.8879170447340103, 'learning_rate': 1.5335783066915436e-06, 'memory/max_mem_active(gib)': 69.44, 'memory/max_mem_allocated(gib)': 67.66, 'memory/device_mem_reserved(gib)': 75.77, 'epoch': 0.49}
|
| 1044 |
+
0:
|
| 1045 |
81%|████████ | 574/711 [52:46<11:53, 5.21s/it]
|
| 1046 |
81%|████████ | 575/711 [52:51<11:44, 5.18s/it]
|
| 1047 |
81%|████████ | 576/711 [52:56<11:38, 5.17s/it]
|
| 1048 |
81%|████████ | 577/711 [53:02<11:46, 5.27s/it]
|
| 1049 |
81%|████████▏ | 578/711 [53:07<11:49, 5.33s/it]
|
| 1050 |
81%|████████▏ | 579/711 [53:12<11:34, 5.26s/it]
|
| 1051 |
82%|████████▏ | 580/711 [53:18<11:25, 5.23s/it]
|
| 1052 |
|
| 1053 |
82%|████████▏ | 580/711 [53:18<11:25, 5.23s/it]
|
| 1054 |
82%|████████▏ | 581/711 [53:23<11:15, 5.19s/it]
|
| 1055 |
82%|████████▏ | 582/711 [53:28<11:06, 5.16s/it]
|
| 1056 |
82%|████████▏ | 583/711 [53:33<10:59, 5.15s/it]
|
| 1057 |
82%|████████▏ | 584/711 [53:38<10:52, 5.14s/it]
|
| 1058 |
82%|████████▏ | 585/711 [53:43<10:46, 5.13s/it]
|
| 1059 |
82%|████████▏ | 586/711 [53:48<10:42, 5.14s/it]
|
| 1060 |
83%|███████�
|
| 1061 |
+
0: {'loss': 0.3763, 'grad_norm': 0.829865464468096, 'learning_rate': 1.4048641282207622e-06, 'memory/max_mem_active(gib)': 69.44, 'memory/max_mem_allocated(gib)': 67.66, 'memory/device_mem_reserved(gib)': 76.53, 'epoch': 0.5}
|
| 1062 |
+
0: �▎ | 587/711 [53:54<11:03, 5.35s/it]
|
| 1063 |
83%|████████▎ | 588/711 [53:59<10:55, 5.33s/it]
|
| 1064 |
83%|████████▎ | 589/711 [54:05<10:40, 5.25s/it]
|
| 1065 |
83%|████████▎ | 590/711 [54:10<10:43, 5.32s/it]
|
| 1066 |
|
| 1067 |
83%|████████▎ | 590/711 [54:10<10:43, 5.32s/it]
|
| 1068 |
83%|████████▎ | 591/711 [54:15<10:32, 5.27s/it]
|
| 1069 |
83%|████████▎ | 592/711 [54:20<10:23, 5.24s/it]
|
| 1070 |
83%|████████▎ | 593/711 [54:25<10:14, 5.21s/it]
|
| 1071 |
84%|████████▎ | 594/711 [54:31<10:07, 5.19s/it]
|
| 1072 |
84%|████████▎ | 595/711 [54:36<09:59, 5.17s/it]
|
| 1073 |
84%|████████▍ | 596/711 [54:41<09:51, 5.14s/it]
|
| 1074 |
84%|████████▍ | 597/711 [54:46<09:53, 5.21s/it]
|
| 1075 |
84%|████████▍ | 598/711 [54:51<09:45, 5.18s/it]
|
| 1076 |
84%|████████▍ | 599/711 [54:57<10:07, 5.42s/it]
|
| 1077 |
84%|████████▍ | 600/711 [55:02
|
| 1078 |
+
0: {'loss': 0.3895, 'grad_norm': 1.0461353857982227, 'learning_rate': 1.2686431831271522e-06, 'memory/max_mem_active(gib)': 69.44, 'memory/max_mem_allocated(gib)': 67.66, 'memory/device_mem_reserved(gib)': 76.53, 'epoch': 0.51}
|
| 1079 |
+
0: {'loss': 0.3726, 'grad_norm': 0.8291006806562558, 'learning_rate': 1.1282696831703153e-06, 'memory/max_mem_active(gib)': 69.44, 'memory/max_mem_allocated(gib)': 67.66, 'memory/device_mem_reserved(gib)': 76.53, 'epoch': 0.51}
|
| 1080 |
+
0: <09:53, 5.35s/it]
|
| 1081 |
|
| 1082 |
84%|████████▍ | 600/711 [55:02<09:53, 5.35s/it]
|
| 1083 |
85%|████████▍ | 601/711 [55:08<09:48, 5.35s/it]
|
| 1084 |
85%|████████▍ | 602/711 [55:13<09:35, 5.28s/it]
|
| 1085 |
85%|████████▍ | 603/711 [55:18<09:24, 5.23s/it]
|
| 1086 |
85%|████████▍ | 604/711 [55:23<09:22, 5.26s/it]
|
| 1087 |
85%|████████▌ | 605/711 [55:28<09:12, 5.22s/it]
|
| 1088 |
85%|████████▌ | 606/711 [55:34<09:27, 5.41s/it]
|
| 1089 |
85%|████████▌ | 607/711 [55:40<09:29, 5.47s/it]
|
| 1090 |
86%|████████▌ | 608/711 [55:45<09:17, 5.41s/it]
|
| 1091 |
86%|████████▌ | 609/711 [55:50<09:03, 5.32s/it]
|
| 1092 |
86%|████████▌ | 610/711 [55:56<08:56, 5.31s/it]
|
| 1093 |
|
| 1094 |
86%|████████▌ | 610/711 [55:56<08:56, 5.31s/it]
|
| 1095 |
86%|████████▌ | 611/711 [56:01<08:45, 5.25s/it]
|
| 1096 |
86%|████�
|
| 1097 |
+
0: {'loss': 0.3749, 'grad_norm': 0.8273940149239204, 'learning_rate': 9.87200089792126e-07, 'memory/max_mem_active(gib)': 69.44, 'memory/max_mem_allocated(gib)': 67.66, 'memory/device_mem_reserved(gib)': 76.53, 'epoch': 0.52}
|
| 1098 |
+
0: �███▌ | 612/711 [56:06<08:36, 5.22s/it]
|
| 1099 |
86%|████████▌ | 613/711 [56:11<08:34, 5.24s/it]
|
| 1100 |
86%|████████▋ | 614/711 [56:16<08:25, 5.22s/it]
|
| 1101 |
86%|████████▋ | 615/711 [56:21<08:17, 5.19s/it]
|
| 1102 |
87%|████████▋ | 616/711 [56:28<08:41, 5.48s/it]
|
| 1103 |
87%|████████▋ | 617/711 [56:33<08:24, 5.37s/it]
|
| 1104 |
87%|████████▋ | 618/711 [56:38<08:23, 5.41s/it]
|
| 1105 |
87%|████████▋ | 619/711 [56:43<08:09, 5.33s/it]
|
| 1106 |
87%|████████▋ | 620/711 [56:49<07:59, 5.27s/it]
|
| 1107 |
|
| 1108 |
87%|████████▋ | 620/711 [56:49<07:59, 5.27s/it]
|
| 1109 |
87%|████████▋ | 621/711 [56:54<07:51, 5.24s/it]
|
| 1110 |
87%|████████▋ | 622/711 [56:59<07:43, 5.21s/it]
|
| 1111 |
88%|████████▊ | 623/711 [57:04<07:44, 5.28s/it]
|
| 1112 |
88%|████████▊ | 624/711 [57:09<07:35, 5.23s/it]
|
| 1113 |
88%|████████▊ | 625/7
|
| 1114 |
+
0: {'loss': 0.3717, 'grad_norm': 0.7609373973543032, 'learning_rate': 8.489080045646937e-07, 'memory/max_mem_active(gib)': 69.44, 'memory/max_mem_allocated(gib)': 67.66, 'memory/device_mem_reserved(gib)': 76.53, 'epoch': 0.53}
|
| 1115 |
+
0: 11 [57:14<07:26, 5.19s/it]
|
| 1116 |
88%|████████▊ | 626/711 [57:20<07:19, 5.17s/it]
|
| 1117 |
88%|████████▊ | 627/711 [57:25<07:12, 5.14s/it]
|
| 1118 |
88%|████████▊ | 628/711 [57:30<07:07, 5.15s/it]
|
| 1119 |
88%|████████▊ | 629/711 [57:35<07:03, 5.16s/it]
|
| 1120 |
89%|████████▊ | 630/711 [57:41<07:25, 5.49s/it]
|
| 1121 |
|
| 1122 |
89%|████████▊ | 630/711 [57:41<07:25, 5.49s/it]
|
| 1123 |
89%|████████▊ | 631/711 [57:47<07:13, 5.42s/it]
|
| 1124 |
89%|████████▉ | 632/711 [57:52<07:01, 5.33s/it]
|
| 1125 |
89%|████████▉ | 633/711 [57:57<06:51, 5.28s/it]
|
| 1126 |
89%|████████▉ | 634/711 [58:02<06:42, 5.23s/it]
|
| 1127 |
89%|████████▉ | 635/711 [58:07<06:35, 5.21s/it]
|
| 1128 |
89%|████████▉ | 636/711 [58:13<06:36, 5.28s/it]
|
| 1129 |
90%|████████▉ | 637/711 [58:18<06:26, 5.23s/it]
|
| 1130 |
90%|████████▉ | 638/711 [58:23<06:19, 5.2
|
| 1131 |
+
0: {'loss': 0.3746, 'grad_norm': 0.8664669621801867, 'learning_rate': 7.167986375914345e-07, 'memory/max_mem_active(gib)': 69.44, 'memory/max_mem_allocated(gib)': 67.66, 'memory/device_mem_reserved(gib)': 76.53, 'epoch': 0.54}
|
| 1132 |
+
0: {'loss': 0.3774, 'grad_norm': 0.7890688699500655, 'learning_rate': 5.941249599330827e-07, 'memory/max_mem_active(gib)': 69.44, 'memory/max_mem_allocated(gib)': 67.66, 'memory/device_mem_reserved(gib)': 76.53, 'epoch': 0.55}
|
| 1133 |
+
0: 0s/it]
|
| 1134 |
90%|████████▉ | 639/711 [58:28<06:19, 5.28s/it]
|
| 1135 |
90%|█████████ | 640/711 [58:33<06:11, 5.24s/it]
|
| 1136 |
|
| 1137 |
90%|█████████ | 640/711 [58:33<06:11, 5.24s/it]
|
| 1138 |
90%|█████████ | 641/711 [58:39<06:10, 5.30s/it]
|
| 1139 |
90%|█████████ | 642/711 [58:44<06:02, 5.25s/it]
|
| 1140 |
90%|█████████ | 643/711 [58:49<05:53, 5.20s/it]
|
| 1141 |
91%|█████████ | 644/711 [58:54<05:46, 5.17s/it]
|
| 1142 |
91%|█████████ | 645/711 [59:00<05:46, 5.26s/it]
|
| 1143 |
91%|█████████ | 646/711 [59:05<05:38, 5.21s/it]
|
| 1144 |
91%|█████████ | 647/711 [59:10<05:31, 5.18s/it]
|
| 1145 |
91%|█████████ | 648/711 [59:15<05:25, 5.17s/it]
|
| 1146 |
91%|█████████▏| 649/711 [59:20<05:18, 5.14s/it]
|
| 1147 |
91%|█████████▏| 650/711 [59:25<05:18, 5.23s/it]
|
| 1148 |
|
| 1149 |
91%|███████�
|
| 1150 |
+
0: {'loss': 0.3821, 'grad_norm': 0.8161369753902079, 'learning_rate': 4.839076046641801e-07, 'memory/max_mem_active(gib)': 69.44, 'memory/max_mem_allocated(gib)': 67.66, 'memory/device_mem_reserved(gib)': 76.53, 'epoch': 0.56}
|
| 1151 |
+
0: ��█▏| 650/711 [59:25<05:18, 5.23s/it]
|
| 1152 |
92%|█████████▏| 651/711 [59:31<05:11, 5.20s/it]
|
| 1153 |
92%|█████████▏| 652/711 [59:36<05:04, 5.16s/it]
|
| 1154 |
92%|█████████▏| 653/711 [59:41<05:07, 5.31s/it]
|
| 1155 |
92%|█████████▏| 654/711 [59:46<04:58, 5.24s/it]
|
| 1156 |
92%|█████████▏| 655/711 [59:52<04:52, 5.22s/it]
|
| 1157 |
92%|█████████▏| 656/711 [59:57<04:51, 5.30s/it]
|
| 1158 |
92%|█████████▏| 657/711 [1:00:02<04:43, 5.26s/it]
|
| 1159 |
93%|█████████▎| 658/711 [1:00:07<04:36, 5.22s/it]
|
| 1160 |
93%|█████████▎| 659/711 [1:00:13<04:32, 5.23s/it]
|
| 1161 |
93%|█████████▎| 660/711 [1:00:18<04:24, 5.19s/it]
|
| 1162 |
|
| 1163 |
93%|█████████▎| 660/711 [1:00:18<04:24, 5.19s/it]
|
| 1164 |
93%|█████████▎| 661/711 [1:00:23<04:23, 5.28s/it]
|
| 1165 |
93%|█████████▎| 662/711 [1:00:28<04:16, 5.23s/it]
|
| 1166 |
93%
|
| 1167 |
+
0: {'loss': 0.3655, 'grad_norm': 0.8247018856496126, 'learning_rate': 3.888604888618786e-07, 'memory/max_mem_active(gib)': 69.44, 'memory/max_mem_allocated(gib)': 67.66, 'memory/device_mem_reserved(gib)': 76.53, 'epoch': 0.56}
|
| 1168 |
+
0: |█████████▎| 663/711 [1:00:33<04:09, 5.20s/it]
|
| 1169 |
93%|█████████▎| 664/711 [1:00:39<04:02, 5.17s/it]
|
| 1170 |
94%|█████████▎| 665/711 [1:00:44<03:57, 5.16s/it]
|
| 1171 |
94%|█████████▎| 666/711 [1:00:49<03:54, 5.21s/it]
|
| 1172 |
94%|█████████▍| 667/711 [1:00:54<03:48, 5.19s/it]
|
| 1173 |
94%|█████████▍| 668/711 [1:00:59<03:41, 5.16s/it]
|
| 1174 |
94%|█████████▍| 669/711 [1:01:04<03:35, 5.14s/it]
|
| 1175 |
94%|█████████▍| 670/711 [1:01:10<03:33, 5.22s/it]
|
| 1176 |
|
| 1177 |
94%|█████████▍| 670/711 [1:01:10<03:33, 5.22s/it]
|
| 1178 |
94%|█████████▍| 671/711 [1:01:15<03:27, 5.18s/it]
|
| 1179 |
95%|█████████▍| 672/711 [1:01:20<03:23, 5.22s/it]
|
| 1180 |
95%|█████████▍| 673/711 [1:01:25<03:17, 5.20s/it]
|
| 1181 |
95%|█████████▍| 674/711 [1:01:30<03:10, 5.16s/it]
|
| 1182 |
95%|█████████▍| 67
|
| 1183 |
+
0: {'loss': 0.3832, 'grad_norm': 0.7788264476641573, 'learning_rate': 3.1132398796052294e-07, 'memory/max_mem_active(gib)': 69.55, 'memory/max_mem_allocated(gib)': 67.66, 'memory/device_mem_reserved(gib)': 76.53, 'epoch': 0.57}
|
| 1184 |
+
0: 5/711 [1:01:36<03:06, 5.19s/it]
|
| 1185 |
95%|█████████▌| 676/711 [1:01:42<03:13, 5.53s/it]
|
| 1186 |
95%|█████████▌| 677/711 [1:01:48<03:11, 5.63s/it]
|
| 1187 |
95%|█████████▌| 678/711 [1:01:53<03:02, 5.52s/it]
|
| 1188 |
95%|█████████▌| 679/711 [1:01:58<02:52, 5.38s/it]
|
| 1189 |
96%|█████████▌| 680/711 [1:02:03<02:44, 5.29s/it]
|
| 1190 |
|
| 1191 |
96%|█████████▌| 680/711 [1:02:03<02:44, 5.29s/it]
|
| 1192 |
96%|█████████▌| 681/711 [1:02:08<02:37, 5.25s/it]
|
| 1193 |
96%|█████████▌| 682/711 [1:02:14<02:33, 5.31s/it]
|
| 1194 |
96%|█████████▌| 683/711 [1:02:19<02:27, 5.25s/it]
|
| 1195 |
96%|█████████▌| 684/711 [1:02:24<02:20, 5.21s/it]
|
| 1196 |
96%|█████████▋| 685/711 [1:02:29<02:14, 5.18s/it]
|
| 1197 |
96%|█████████▋| 686/711 [1:02:34<02:09, 5.17s/it]
|
| 1198 |
97%|█████████▋| 687/711 [1:02:39<02:03, 5.17s/it]
|
| 1199 |
9
|
| 1200 |
+
0: {'loss': 0.3818, 'grad_norm': 0.7875994058840892, 'learning_rate': 2.532073079411971e-07, 'memory/max_mem_active(gib)': 69.55, 'memory/max_mem_allocated(gib)': 67.66, 'memory/device_mem_reserved(gib)': 76.53, 'epoch': 0.58}
|
| 1201 |
+
0: 7%|█████████▋| 688/711 [1:02:45<01:58, 5.15s/it]
|
| 1202 |
97%|█████████▋| 689/711 [1:02:50<01:53, 5.15s/it]
|
| 1203 |
97%|█████████▋| 690/711 [1:02:55<01:47, 5.14s/it]
|
| 1204 |
|
| 1205 |
97%|█████████▋| 690/711 [1:02:55<01:47, 5.14s/it]
|
| 1206 |
97%|█████████▋| 691/711 [1:03:00<01:43, 5.19s/it]
|
| 1207 |
97%|█████████▋| 692/711 [1:03:05<01:37, 5.16s/it]
|
| 1208 |
97%|█████████▋| 693/711 [1:03:10<01:32, 5.14s/it]
|
| 1209 |
98%|█████████▊| 694/711 [1:03:15<01:27, 5.13s/it]
|
| 1210 |
98%|█████████▊| 695/711 [1:03:21<01:22, 5.13s/it]
|
| 1211 |
98%|█████████▊| 696/711 [1:03:26<01:17, 5.14s/it]
|
| 1212 |
98%|█████████▊| 697/711 [1:03:31<01:11, 5.14s/it]
|
| 1213 |
98%|█████████▊| 698/711 [1:03:36<01:06, 5.14s/it]
|
| 1214 |
98%|█████████▊| 699/711 [1:03:41<01:02, 5.17s/it]
|
| 1215 |
98%|█████████▊|
|
| 1216 |
+
0: {'loss': 0.3763, 'grad_norm': 0.7322258611860605, 'learning_rate': 2.1594147434418026e-07, 'memory/max_mem_active(gib)': 69.55, 'memory/max_mem_allocated(gib)': 67.66, 'memory/device_mem_reserved(gib)': 76.53, 'epoch': 0.59}
|
| 1217 |
+
0: {'loss': 0.3734, 'grad_norm': 0.7653492019720352, 'learning_rate': 2.0044409567084156e-07, 'memory/max_mem_active(gib)': 69.55, 'memory/max_mem_allocated(gib)': 67.66, 'memory/device_mem_reserved(gib)': 76.53, 'epoch': 0.6}
|
| 1218 |
+
0: 700/711 [1:03:47<00:57, 5.24s/it]
|
| 1219 |
|
| 1220 |
98%|█████████▊| 700/711 [1:03:47<00:57, 5.24s/it]
|
| 1221 |
99%|█████████▊| 701/711 [1:03:52<00:52, 5.26s/it]
|
| 1222 |
99%|█████████▊| 702/711 [1:03:57<00:47, 5.23s/it]
|
| 1223 |
99%|█████████▉| 703/711 [1:04:02<00:41, 5.20s/it]
|
| 1224 |
99%|█████████▉| 704/711 [1:04:07<00:36, 5.20s/it]
|
| 1225 |
99%|█████████▉| 705/711 [1:04:13<00:31, 5.30s/it]
|
| 1226 |
99%|█████████▉| 706/711 [1:04:18<00:26, 5.24s/it]
|
| 1227 |
99%|█████████▉| 707/711 [1:04:23<00:21, 5.27s/it]
|
| 1228 |
|
| 1229 |
+
0: [2025-11-24 01:16:53,191] [INFO] [axolotl.core.trainers.base._save:613] [PID:3081979] [RANK:0] Saving model checkpoint to /lustre/fswork/projects/rech/dgo/udv55np/ift/Nemotron-Super-49B-v1_5/gemma-3-12b/0/checkpoint-711[39m
|
| 1230 |
+
0: [2025-11-24 01:17:11,725] [INFO] [axolotl.core.trainers.base._save:662] [PID:3081979] [RANK:0] Saving Trainer.data_collator.tokenizer by default as Trainer.processing_class is `None`[39m
|
| 1231 |
+
0: {'train_runtime': 3910.4069, 'train_samples_per_second': 2.909, 'train_steps_per_second': 0.182, 'train_loss': 0.4125736778295493, 'memory/max_mem_active(gib)': 69.55, 'memory/max_mem_allocated(gib)': 67.66, 'memory/device_mem_reserved(gib)': 76.53, 'epoch': 0.6}
|
| 1232 |
+
0: �█████| 711/711 [1:04:44<00:00, 5.22s/it]
|
| 1233 |
|
| 1234 |
+
0: [2025-11-24 01:17:21,056] [INFO] [axolotl.train.save_trained_model:228] [PID:3081979] [RANK:0] Training completed! Saving trained model to /lustre/fswork/projects/rech/dgo/udv55np/ift/Nemotron-Super-49B-v1_5/gemma-3-12b/0.[39m
|
| 1235 |
+
0: [2025-11-24 01:17:26,694] [INFO] [axolotl.core.trainers.base._save:613] [PID:3081979] [RANK:0] Saving model checkpoint to /lustre/fswork/projects/rech/dgo/udv55np/ift/Nemotron-Super-49B-v1_5/gemma-3-12b/0[39m
|
| 1236 |
+
0: [2025-11-24 01:17:44,493] [INFO] [axolotl.core.trainers.base._save:662] [PID:3081979] [RANK:0] Saving Trainer.data_collator.tokenizer by default as Trainer.processing_class is `None`[39m
|
| 1237 |
+
0: [2025-11-24 01:17:44,817] [INFO] [axolotl.train.save_trained_model:350] [PID:3081979] [RANK:0] Model successfully saved to /lustre/fswork/projects/rech/dgo/udv55np/ift/Nemotron-Super-49B-v1_5/gemma-3-12b/0[39m
|
special_tokens_map.json
ADDED
|
@@ -0,0 +1,33 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"boi_token": "<start_of_image>",
|
| 3 |
+
"bos_token": {
|
| 4 |
+
"content": "<bos>",
|
| 5 |
+
"lstrip": false,
|
| 6 |
+
"normalized": false,
|
| 7 |
+
"rstrip": false,
|
| 8 |
+
"single_word": false
|
| 9 |
+
},
|
| 10 |
+
"eoi_token": "<end_of_image>",
|
| 11 |
+
"eos_token": {
|
| 12 |
+
"content": "<eos>",
|
| 13 |
+
"lstrip": false,
|
| 14 |
+
"normalized": false,
|
| 15 |
+
"rstrip": false,
|
| 16 |
+
"single_word": false
|
| 17 |
+
},
|
| 18 |
+
"image_token": "<image_soft_token>",
|
| 19 |
+
"pad_token": {
|
| 20 |
+
"content": "<pad>",
|
| 21 |
+
"lstrip": false,
|
| 22 |
+
"normalized": false,
|
| 23 |
+
"rstrip": false,
|
| 24 |
+
"single_word": false
|
| 25 |
+
},
|
| 26 |
+
"unk_token": {
|
| 27 |
+
"content": "<unk>",
|
| 28 |
+
"lstrip": false,
|
| 29 |
+
"normalized": false,
|
| 30 |
+
"rstrip": false,
|
| 31 |
+
"single_word": false
|
| 32 |
+
}
|
| 33 |
+
}
|
tokenizer.json
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4667f2089529e8e7657cfb6d1c19910ae71ff5f28aa7ab2ff2763330affad795
|
| 3 |
+
size 33384568
|
tokenizer.model
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1299c11d7cf632ef3b4e11937501358ada021bbdf7c47638d13c0ee982f2e79c
|
| 3 |
+
size 4689074
|
tokenizer_config.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
training_args.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d94a47440372bd382b8068468e71d951722420260c48543cdcd097f95f9ee7fb
|
| 3 |
+
size 10424
|