diff --git a/.gitattributes b/.gitattributes index a6344aac8c09253b3b630fb776ae94478aa0275b..43c3861ee69c575e8d96e4c4cc676473746000b6 100644 --- a/.gitattributes +++ b/.gitattributes @@ -33,3 +33,14 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text *.zip filter=lfs diff=lfs merge=lfs -text *.zst filter=lfs diff=lfs merge=lfs -text *tfevents* filter=lfs diff=lfs merge=lfs -text +checkpoint-1025/tokenizer.json filter=lfs diff=lfs merge=lfs -text +checkpoint-1230/tokenizer.json filter=lfs diff=lfs merge=lfs -text +checkpoint-1435/tokenizer.json filter=lfs diff=lfs merge=lfs -text +checkpoint-1640/tokenizer.json filter=lfs diff=lfs merge=lfs -text +checkpoint-1842/tokenizer.json filter=lfs diff=lfs merge=lfs -text +checkpoint-205/tokenizer.json filter=lfs diff=lfs merge=lfs -text +checkpoint-410/tokenizer.json filter=lfs diff=lfs merge=lfs -text +checkpoint-615/tokenizer.json filter=lfs diff=lfs merge=lfs -text +checkpoint-820/tokenizer.json filter=lfs diff=lfs merge=lfs -text +final/tokenizer.json filter=lfs diff=lfs merge=lfs -text +tokenizer.json filter=lfs diff=lfs merge=lfs -text diff --git a/.ipynb_checkpoints/plot_loss_from_trainer_state-checkpoint.py b/.ipynb_checkpoints/plot_loss_from_trainer_state-checkpoint.py new file mode 100644 index 0000000000000000000000000000000000000000..eaf442644d02d8625f8186aa6d050495712cd2f9 --- /dev/null +++ b/.ipynb_checkpoints/plot_loss_from_trainer_state-checkpoint.py @@ -0,0 +1,160 @@ +# -*- coding: utf-8 -*- +""" +Usage: + python plot_loss_from_trainer_state.py --input trainer_state.json --outdir ./plots \ + --checkpoint_steps 263,526,789,1052 + +功能: +- Curve: 黃橘色實線 +- Grid: x,y 虛線 +- Epoch markers: 藍色虛線 + EpochN 標籤(含最後一個 epoch) +- Checkpoints: 藍色小圓點(線性插值;超出範圍時使用端點值,並自動擴張 x 軸確保能看見) +""" +import json, argparse +from pathlib import Path +import matplotlib.pyplot as plt +import numpy as np + +YELLOW_ORANGE = "#d58f00" +BLUE = "#1f77b4" + +def find_epoch_boundaries(log_items): + """找到每個 epoch 邊界 (包含最後一個)""" + boundaries = [] + prev_epoch_int = None + seen = set() + last_step, last_epoch = None, None + for it in log_items: + step = it.get("step") + ep = it.get("epoch") + if step is None or ep is None: + continue + last_step, last_epoch = step, ep + ep_int = int(ep) + if prev_epoch_int is None: + prev_epoch_int = ep_int + continue + if ep_int != prev_epoch_int: + if (step, ep_int) not in seen and ep_int >= 1: + boundaries.append((step, ep_int)) + seen.add((step, ep_int)) + prev_epoch_int = ep_int + # 最後一個 epoch 也補上 + if last_step is not None and last_epoch is not None: + ep_final = int(float(last_epoch)) + 1 + if (last_step, ep_final) not in seen: + boundaries.append((last_step, ep_final)) + boundaries.sort(key=lambda x: x[0]) + return boundaries + +def plot_series(x, y, xlabel, ylabel, title, outpath, + epoch_marks=None, checkpoint_steps=None, + color=YELLOW_ORANGE, linestyle='-'): + fig = plt.figure(figsize=(10,6)) + ax = fig.add_subplot(111) + ax.plot(x, y, color=color, linestyle=linestyle, linewidth=2) + + # 標記 checkpoint 藍點(線性插值;邊界外使用端點值) + extra_x = [] + if checkpoint_steps: + for s in checkpoint_steps: + y_interp = np.interp(s, x, y, left=y[0], right=y[-1]) + ax.plot(s, y_interp, marker='o', color=BLUE, markersize=6) + extra_x.append(s) + + # === 計算 x 範圍時把 epoch 標線也納入,並加右側 padding === + xmin = 0 + all_x_candidates = [max(x)] + if extra_x: + all_x_candidates.append(max(extra_x)) + if epoch_marks: + # 把所有 epoch 標線的 step 納入考量 + ep_steps = [s for (s, _) in epoch_marks] + if ep_steps: + all_x_candidates.append(max(ep_steps)) + + xmax_base = max(all_x_candidates) if all_x_candidates else x[-1] + + # 右邊加一點 margin,避免剛好貼齊看不到線 + span = max(xmax_base - xmin, 1.0) + right_pad = max(1.0, 0.02 * span) # 至少 +1 step 或 2% 寬度 + ax.set_xlim(left=xmin, right=xmax_base + right_pad) + + # y 仍從 0 起 + ax.set_ylim(bottom=0) + + # 虛線格線 + ax.grid(True, which='major', axis='both', linestyle='--', linewidth=0.8, alpha=0.6) + + # epoch 標記 (藍色虛線) + if epoch_marks: + for step, ep in epoch_marks: + ax.axvline(x=step, color=BLUE, linestyle='--', linewidth=1.2) + ymax = ax.get_ylim()[1] + ax.text(step, ymax*0.98, f'Epoch{ep}', rotation=90, + va='top', ha='right', fontsize=8, color=BLUE) + + # label & look(放到最後避免被 set_xlim/set_ylim 影響) + ax.set_xlabel(xlabel); ax.set_ylabel(ylabel); ax.set_title(title) + ax.spines['left'].set_linewidth(2); ax.spines['bottom'].set_linewidth(2) + ax.spines['right'].set_visible(False); ax.spines['top'].set_visible(False) + + fig.savefig(outpath, bbox_inches="tight") + plt.close(fig) + + +def main(): + ap = argparse.ArgumentParser() + ap.add_argument("--input", required=True, help="Path to trainer_state.json") + ap.add_argument("--outdir", default="./plots", help="Directory to save PNGs") + ap.add_argument("--no_epoch_marks", action="store_true", help="Disable vertical epoch markers") + ap.add_argument("--checkpoint_steps", default="", help="Comma-separated steps (e.g., 100,200,500)") + args = ap.parse_args() + + src = Path(args.input) + with open(src, "r", encoding="utf-8") as f: + state = json.load(f) + + log = state.get("log_history", state.get("logs", [])) + + steps, train_losses = [], [] + eval_steps, eval_losses = [], [] + lr_steps, lrs = [], [] + + for item in log: + step = item.get("step") + if step is None: + continue + if "loss" in item: + steps.append(step); train_losses.append(item["loss"]) + if "eval_loss" in item: + eval_steps.append(step); eval_losses.append(item["eval_loss"]) + if "learning_rate" in item: + lr_steps.append(step); lrs.append(item["learning_rate"]) + + outdir = Path(args.outdir); outdir.mkdir(parents=True, exist_ok=True) + + epoch_marks = None if args.no_epoch_marks else find_epoch_boundaries(log) + # 允許空白與混合格式 + raw = [s.strip() for s in args.checkpoint_steps.replace(",", ",").split(",") if s.strip()] + checkpoint_steps = [] + for s in raw: + try: + checkpoint_steps.append(int(float(s))) + except: + pass + + if steps and train_losses: + plot_series(steps, train_losses, "Step", "Training Loss", "Training Loss vs Step", + outdir / "loss_curve.png", epoch_marks=epoch_marks, checkpoint_steps=checkpoint_steps) + if eval_steps and eval_losses: + plot_series(eval_steps, eval_losses, "Step", "Eval Loss", "Eval Loss vs Step", + outdir / "eval_loss_curve.png", epoch_marks=epoch_marks, checkpoint_steps=checkpoint_steps) + if lr_steps and lrs: + plot_series(lr_steps, lrs, "Step", "Learning Rate", "Learning Rate vs Step", + outdir / "lr_curve.png", epoch_marks=epoch_marks, checkpoint_steps=checkpoint_steps) + + print(f"Saved plots to: {outdir.resolve()}") + +if __name__ == "__main__": + main() diff --git a/README.md b/README.md new file mode 100644 index 0000000000000000000000000000000000000000..20c165c7feaa880a18e0662941a5b246a698be2e --- /dev/null +++ b/README.md @@ -0,0 +1,209 @@ +--- +base_model: Salesforce/Llama-xLAM-2-8b-fc-r +library_name: peft +pipeline_tag: text-generation +tags: +- base_model:adapter:Salesforce/Llama-xLAM-2-8b-fc-r +- lora +- sft +- transformers +- trl +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.17.1 \ No newline at end of file diff --git a/adapter_config.json b/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..9eda25bff7c128105e91aa1a00507aaa31dd53fd --- /dev/null +++ b/adapter_config.json @@ -0,0 +1,39 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "Salesforce/Llama-xLAM-2-8b-fc-r", + "bias": "none", + "corda_config": null, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 32, + "lora_bias": false, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "qalora_group_size": 16, + "r": 16, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "q_proj", + "v_proj", + "k_proj", + "o_proj" + ], + "target_parameters": null, + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/adapter_model.safetensors b/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..850b8a7b7e98c250c049c275821793dc06f6bd0f --- /dev/null +++ b/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9dfa231e6a429050ed62c5c2fd8478b22f783cde633e267292cba7c756066ef2 +size 54560368 diff --git a/checkpoint-1025/README.md b/checkpoint-1025/README.md new file mode 100644 index 0000000000000000000000000000000000000000..20c165c7feaa880a18e0662941a5b246a698be2e --- /dev/null +++ b/checkpoint-1025/README.md @@ -0,0 +1,209 @@ +--- +base_model: Salesforce/Llama-xLAM-2-8b-fc-r +library_name: peft +pipeline_tag: text-generation +tags: +- base_model:adapter:Salesforce/Llama-xLAM-2-8b-fc-r +- lora +- sft +- transformers +- trl +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.17.1 \ No newline at end of file diff --git a/checkpoint-1025/adapter_config.json b/checkpoint-1025/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..9eda25bff7c128105e91aa1a00507aaa31dd53fd --- /dev/null +++ b/checkpoint-1025/adapter_config.json @@ -0,0 +1,39 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "Salesforce/Llama-xLAM-2-8b-fc-r", + "bias": "none", + "corda_config": null, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 32, + "lora_bias": false, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "qalora_group_size": 16, + "r": 16, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "q_proj", + "v_proj", + "k_proj", + "o_proj" + ], + "target_parameters": null, + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/checkpoint-1025/adapter_model.safetensors b/checkpoint-1025/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..5e96392fc4084331d58b5aa780316650be916ed7 --- /dev/null +++ b/checkpoint-1025/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:03df54f7be1d57f8d7321ba2bab53a9ad2fe6140664c53dbc83389d42bd72e78 +size 54560368 diff --git a/checkpoint-1025/optimizer.pt b/checkpoint-1025/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..8c682bf86af1482c7ed829bc214a441fc7557daf --- /dev/null +++ b/checkpoint-1025/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a5974423bc5365cf9c21c2be95a987cca5d0e097ea52dbf544cdbc1ad08be530 +size 109267450 diff --git a/checkpoint-1025/rng_state.pth b/checkpoint-1025/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..4719e7ddf3899ec45c8dabdda8ca9c70b288128c --- /dev/null +++ b/checkpoint-1025/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:98bd0320fb485c5f1f6d8f7f83f8d329c43c4c60a7c994314dee571ce563ba3f +size 14244 diff --git a/checkpoint-1025/scheduler.pt b/checkpoint-1025/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..ba8005ce7191e41ced2348d75009260623476909 --- /dev/null +++ b/checkpoint-1025/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4f5408a03c212eb536c8cc52128039c5302e7da7927d72de6b3a298661a44a42 +size 1064 diff --git a/checkpoint-1025/special_tokens_map.json b/checkpoint-1025/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..1c69f55fd7bb10288d16d4c76497e21e25adfe4f --- /dev/null +++ b/checkpoint-1025/special_tokens_map.json @@ -0,0 +1,27 @@ +{ + "additional_special_tokens": [ + "<|eot_id|>", + "<|eom_id|>" + ], + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/checkpoint-1025/tokenizer.json b/checkpoint-1025/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..1c1d8d5c9024994f1d3b00f9662b8dd89ca13cf2 --- /dev/null +++ b/checkpoint-1025/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b9e4e7fb171f92fd137b777cc2714bf87d11576700a1dcd7a399e7bbe39537b +size 17209920 diff --git a/checkpoint-1025/tokenizer_config.json b/checkpoint-1025/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..3c3fc3b2c234aa8b39e8ee01e3ee9e6fe54a2a1f --- /dev/null +++ b/checkpoint-1025/tokenizer_config.json @@ -0,0 +1,2070 @@ +{ + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|python_tag|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "additional_special_tokens": [ + "<|eot_id|>", + "<|eom_id|>" + ], + "bos_token": "<|begin_of_text|>", + "chat_template": "{{- bos_token }}\n{%- if custom_tools is defined %}\n {%- set tools = custom_tools %}\n{%- endif %}\n{%- if not tools_in_user_message is defined %}\n {%- set tools_in_user_message = true %}\n{%- endif %}\n{%- if not tools is defined %}\n {%- set tools = none %}\n{%- endif %}\n\n{#- Extract system message #}\n{{- \"<|start_header_id|>system<|end_header_id|>\\n\\n\" }}\n{%- if messages[0]['role'] == 'system' %}\n {%- set system_message = messages[0]['content'] | trim %}\n {%- set messages = messages[1:] %}\n {{- system_message + \"\\n\" }}\n{%- else %}\n {%- set system_message = \"You are a helpful assistant that can use tools. You are developed by Salesforce xLAM team.\" %}\n {% set format_instruction %}You have access to a set of tools. When using tools, make calls in a single JSON array: \n\n[{\"name\": \"tool_call_name\", \"arguments\": {\"arg1\": \"value1\", \"arg2\": \"value2\"}}, ... (additional parallel tool calls as needed)]\n\nIf no tool is suitable, state that explicitly. If the user's input lacks required parameters, ask for clarification. Do not interpret or respond until tool results are returned. Once they are available, process them or make additional calls if needed. For tasks that don't require tools, such as casual conversation or general advice, respond directly in plain text. The available tools are:{% endset %}\n {{- system_message + \"\\n\" }}\n {%- if tools is not none %}\n {{- format_instruction + \"\\n\\n\" }}\n {%- endif %}\n{%- endif %}\n\n\n{%- if tools is not none %}\n {%- for t in tools %}\n {{- t | tojson(indent=4) }}\n {{- \"\\n\\n\" }}\n {%- endfor %}\n{%- endif %}\n{{- \"<|eot_id|>\" }}\n\n{%- for message in messages %}\n {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %}\n {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\\n\\n'+ message['content'] | trim + '<|eot_id|>' }}\n {%- elif 'tool_calls' in message %}\n {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' -}}\n {%- if message['tool_calls'] %}\n {{- \"[\" }}\n {%- for tool_call_function in message.tool_calls %}\n {%- set tool_call = tool_call_function.function %}\n {{- '{\"name\": \"' + tool_call.name + '\", ' }}\n {{- '\"arguments\": ' }}\n {{- tool_call.arguments | tojson }}\n {{- \"}\" }}\n {%- if not loop.last %}\n {{- \", \" }}\n {%- endif %}\n {%- endfor %}\n {{- \"]\" }}\n {{- \"<|eot_id|>\" }}\n {%- elif message['content'] %}\n {{- message['content'] | trim + '<|eot_id|>' }}\n {%- else %}\n {{- \"[]\\n\" + '<|eot_id|>' }}\n {%- endif %}\n {%- elif message.role == \"tool\" or message.role == \"ipython\" %}\n {{- \"<|start_header_id|>\" + \"ipython\" + \"<|end_header_id|>\\n\\n\" }}\n {%- set content = message[\"content\"] %}\n {%- if content is mapping or (content is iterable and content is not string) %}\n {{- content | tojson }}\n {%- else %}\n {{- content }}\n {%- endif %}\n {{- \"<|eot_id|>\" }}\n {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' }}\n{%- endif %}", + "clean_up_tokenization_spaces": true, + "eos_token": "<|eot_id|>", + "extra_special_tokens": {}, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 16384, + "pad_token": "<|eot_id|>", + "padding_side": "right", + "split_special_tokens": false, + "tokenizer_class": "PreTrainedTokenizerFast" +} diff --git a/checkpoint-1025/trainer_state.json b/checkpoint-1025/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..8d2cef3b8ba9d16091fe51adf71728423217c071 --- /dev/null +++ b/checkpoint-1025/trainer_state.json @@ -0,0 +1,787 @@ +{ + "best_metric": 0.22156645357608795, + "best_model_checkpoint": "./xlam_lora_new_ete_over_size_3epoch_multi_mix/checkpoint-1025", + "epoch": 1.6686167777382104, + "eval_steps": 205, + "global_step": 1025, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.016279188075494735, + "grad_norm": 0.7572630643844604, + "learning_rate": 2.688172043010753e-06, + "loss": 0.5223, + "step": 10 + }, + { + "epoch": 0.03255837615098947, + "grad_norm": 0.417061984539032, + "learning_rate": 5.376344086021506e-06, + "loss": 0.4858, + "step": 20 + }, + { + "epoch": 0.048837564226484206, + "grad_norm": 0.3718095123767853, + "learning_rate": 8.064516129032258e-06, + "loss": 0.4246, + "step": 30 + }, + { + "epoch": 0.06511675230197894, + "grad_norm": 0.2949349582195282, + "learning_rate": 1.0752688172043012e-05, + "loss": 0.4405, + "step": 40 + }, + { + "epoch": 0.08139594037747368, + "grad_norm": 0.3159159719944, + "learning_rate": 1.3440860215053763e-05, + "loss": 0.4148, + "step": 50 + }, + { + "epoch": 0.09767512845296841, + "grad_norm": 0.4167034327983856, + "learning_rate": 1.6129032258064517e-05, + "loss": 0.3393, + "step": 60 + }, + { + "epoch": 0.11395431652846315, + "grad_norm": 0.39410400390625, + "learning_rate": 1.881720430107527e-05, + "loss": 0.2464, + "step": 70 + }, + { + "epoch": 0.13023350460395788, + "grad_norm": 0.3644021153450012, + "learning_rate": 2.1505376344086024e-05, + "loss": 0.2294, + "step": 80 + }, + { + "epoch": 0.1465126926794526, + "grad_norm": 0.30372634530067444, + "learning_rate": 2.4193548387096777e-05, + "loss": 0.2315, + "step": 90 + }, + { + "epoch": 0.16279188075494735, + "grad_norm": 0.2586315870285034, + "learning_rate": 2.4999011923655086e-05, + "loss": 0.1932, + "step": 100 + }, + { + "epoch": 0.17907106883044208, + "grad_norm": 0.37825971841812134, + "learning_rate": 2.4994172742085852e-05, + "loss": 0.2204, + "step": 110 + }, + { + "epoch": 0.19535025690593683, + "grad_norm": 0.21422357857227325, + "learning_rate": 2.4985302531208654e-05, + "loss": 0.1795, + "step": 120 + }, + { + "epoch": 0.21162944498143155, + "grad_norm": 0.2566869854927063, + "learning_rate": 2.4972404152844008e-05, + "loss": 0.1668, + "step": 130 + }, + { + "epoch": 0.2279086330569263, + "grad_norm": 0.28194501996040344, + "learning_rate": 2.49554817684312e-05, + "loss": 0.1476, + "step": 140 + }, + { + "epoch": 0.24418782113242102, + "grad_norm": 0.24139340221881866, + "learning_rate": 2.4934540837685647e-05, + "loss": 0.1609, + "step": 150 + }, + { + "epoch": 0.26046700920791577, + "grad_norm": 0.3306334614753723, + "learning_rate": 2.490958811683741e-05, + "loss": 0.1638, + "step": 160 + }, + { + "epoch": 0.2767461972834105, + "grad_norm": 0.27301114797592163, + "learning_rate": 2.4880631656451447e-05, + "loss": 0.1494, + "step": 170 + }, + { + "epoch": 0.2930253853589052, + "grad_norm": 0.34037259221076965, + "learning_rate": 2.484768079883018e-05, + "loss": 0.1534, + "step": 180 + }, + { + "epoch": 0.30930457343439993, + "grad_norm": 0.2306762933731079, + "learning_rate": 2.4810746174999418e-05, + "loss": 0.1749, + "step": 190 + }, + { + "epoch": 0.3255837615098947, + "grad_norm": 0.3183388113975525, + "learning_rate": 2.476983970127841e-05, + "loss": 0.1482, + "step": 200 + }, + { + "epoch": 0.33372335554764204, + "eval_loss": 0.3089325428009033, + "eval_runtime": 34.8769, + "eval_samples_per_second": 5.419, + "eval_steps_per_second": 5.419, + "step": 205 + }, + { + "epoch": 0.34186294958538943, + "grad_norm": 0.28704971075057983, + "learning_rate": 2.472497457543525e-05, + "loss": 0.1471, + "step": 210 + }, + { + "epoch": 0.35814213766088415, + "grad_norm": 0.2939195930957794, + "learning_rate": 2.4676165272428866e-05, + "loss": 0.1631, + "step": 220 + }, + { + "epoch": 0.3744213257363789, + "grad_norm": 0.31506845355033875, + "learning_rate": 2.4623427539738897e-05, + "loss": 0.1353, + "step": 230 + }, + { + "epoch": 0.39070051381187365, + "grad_norm": 0.3761660158634186, + "learning_rate": 2.456677839228506e-05, + "loss": 0.1716, + "step": 240 + }, + { + "epoch": 0.4069797018873684, + "grad_norm": 0.29187777638435364, + "learning_rate": 2.450623610693757e-05, + "loss": 0.1195, + "step": 250 + }, + { + "epoch": 0.4232588899628631, + "grad_norm": 0.46237581968307495, + "learning_rate": 2.4441820216620425e-05, + "loss": 0.1484, + "step": 260 + }, + { + "epoch": 0.4395380780383578, + "grad_norm": 0.4580917954444885, + "learning_rate": 2.437355150400945e-05, + "loss": 0.1009, + "step": 270 + }, + { + "epoch": 0.4558172661138526, + "grad_norm": 0.4181467890739441, + "learning_rate": 2.4301451994827112e-05, + "loss": 0.1376, + "step": 280 + }, + { + "epoch": 0.4720964541893473, + "grad_norm": 0.3629908561706543, + "learning_rate": 2.422554495073633e-05, + "loss": 0.1083, + "step": 290 + }, + { + "epoch": 0.48837564226484204, + "grad_norm": 0.4282682240009308, + "learning_rate": 2.4145854861835447e-05, + "loss": 0.1373, + "step": 300 + }, + { + "epoch": 0.5046548303403368, + "grad_norm": 0.4914080500602722, + "learning_rate": 2.406240743875699e-05, + "loss": 0.156, + "step": 310 + }, + { + "epoch": 0.5209340184158315, + "grad_norm": 0.3880573511123657, + "learning_rate": 2.3975229604372526e-05, + "loss": 0.1415, + "step": 320 + }, + { + "epoch": 0.5372132064913262, + "grad_norm": 0.42599862813949585, + "learning_rate": 2.3884349485106477e-05, + "loss": 0.1338, + "step": 330 + }, + { + "epoch": 0.553492394566821, + "grad_norm": 0.4339046776294708, + "learning_rate": 2.378979640186163e-05, + "loss": 0.1368, + "step": 340 + }, + { + "epoch": 0.5697715826423158, + "grad_norm": 0.30713170766830444, + "learning_rate": 2.3691600860559222e-05, + "loss": 0.1154, + "step": 350 + }, + { + "epoch": 0.5860507707178104, + "grad_norm": 0.4618566036224365, + "learning_rate": 2.3589794542296764e-05, + "loss": 0.1203, + "step": 360 + }, + { + "epoch": 0.6023299587933052, + "grad_norm": 0.40802672505378723, + "learning_rate": 2.3484410293126664e-05, + "loss": 0.1144, + "step": 370 + }, + { + "epoch": 0.6186091468687999, + "grad_norm": 0.5242702960968018, + "learning_rate": 2.3375482113459014e-05, + "loss": 0.1281, + "step": 380 + }, + { + "epoch": 0.6348883349442946, + "grad_norm": 0.4045926630496979, + "learning_rate": 2.3263045147091944e-05, + "loss": 0.1145, + "step": 390 + }, + { + "epoch": 0.6511675230197894, + "grad_norm": 0.5347346067428589, + "learning_rate": 2.3147135669873096e-05, + "loss": 0.1256, + "step": 400 + }, + { + "epoch": 0.6674467110952841, + "grad_norm": 0.4755608141422272, + "learning_rate": 2.302779107799583e-05, + "loss": 0.1251, + "step": 410 + }, + { + "epoch": 0.6674467110952841, + "eval_loss": 0.26128318905830383, + "eval_runtime": 34.9177, + "eval_samples_per_second": 5.413, + "eval_steps_per_second": 5.413, + "step": 410 + }, + { + "epoch": 0.6837258991707789, + "grad_norm": 0.4720211923122406, + "learning_rate": 2.290504987593399e-05, + "loss": 0.1399, + "step": 420 + }, + { + "epoch": 0.7000050872462736, + "grad_norm": 0.709035336971283, + "learning_rate": 2.2778951664019105e-05, + "loss": 0.1375, + "step": 430 + }, + { + "epoch": 0.7162842753217683, + "grad_norm": 0.534866213798523, + "learning_rate": 2.2649537125664034e-05, + "loss": 0.1125, + "step": 440 + }, + { + "epoch": 0.7325634633972631, + "grad_norm": 0.522056519985199, + "learning_rate": 2.2516848014237146e-05, + "loss": 0.0943, + "step": 450 + }, + { + "epoch": 0.7488426514727577, + "grad_norm": 0.2830965518951416, + "learning_rate": 2.238092713959133e-05, + "loss": 0.1248, + "step": 460 + }, + { + "epoch": 0.7651218395482525, + "grad_norm": 0.39431601762771606, + "learning_rate": 2.2241818354252113e-05, + "loss": 0.1248, + "step": 470 + }, + { + "epoch": 0.7814010276237473, + "grad_norm": 0.4821482002735138, + "learning_rate": 2.209956653926944e-05, + "loss": 0.1359, + "step": 480 + }, + { + "epoch": 0.797680215699242, + "grad_norm": 0.4956236481666565, + "learning_rate": 2.1954217589737535e-05, + "loss": 0.1232, + "step": 490 + }, + { + "epoch": 0.8139594037747367, + "grad_norm": 0.49444642663002014, + "learning_rate": 2.180581839998766e-05, + "loss": 0.1031, + "step": 500 + }, + { + "epoch": 0.8302385918502315, + "grad_norm": 0.3857091963291168, + "learning_rate": 2.165441684845847e-05, + "loss": 0.1023, + "step": 510 + }, + { + "epoch": 0.8465177799257262, + "grad_norm": 0.4830643832683563, + "learning_rate": 2.150006178224886e-05, + "loss": 0.1067, + "step": 520 + }, + { + "epoch": 0.862796968001221, + "grad_norm": 0.5119408965110779, + "learning_rate": 2.1342803001358278e-05, + "loss": 0.1209, + "step": 530 + }, + { + "epoch": 0.8790761560767156, + "grad_norm": 0.46363013982772827, + "learning_rate": 2.118269124261963e-05, + "loss": 0.1134, + "step": 540 + }, + { + "epoch": 0.8953553441522104, + "grad_norm": 0.42933255434036255, + "learning_rate": 2.1019778163329912e-05, + "loss": 0.1101, + "step": 550 + }, + { + "epoch": 0.9116345322277052, + "grad_norm": 0.5474070906639099, + "learning_rate": 2.0854116324583867e-05, + "loss": 0.1291, + "step": 560 + }, + { + "epoch": 0.9279137203031999, + "grad_norm": 0.43502509593963623, + "learning_rate": 2.0685759174316066e-05, + "loss": 0.0936, + "step": 570 + }, + { + "epoch": 0.9441929083786946, + "grad_norm": 0.632621169090271, + "learning_rate": 2.051476103005684e-05, + "loss": 0.1196, + "step": 580 + }, + { + "epoch": 0.9604720964541893, + "grad_norm": 0.553187906742096, + "learning_rate": 2.034117706140768e-05, + "loss": 0.1186, + "step": 590 + }, + { + "epoch": 0.9767512845296841, + "grad_norm": 0.48446330428123474, + "learning_rate": 2.0165063272241712e-05, + "loss": 0.1249, + "step": 600 + }, + { + "epoch": 0.9930304726051788, + "grad_norm": 0.47837090492248535, + "learning_rate": 1.9986476482635003e-05, + "loss": 0.1097, + "step": 610 + }, + { + "epoch": 1.0011700666429262, + "eval_loss": 0.2388339340686798, + "eval_runtime": 34.8769, + "eval_samples_per_second": 5.419, + "eval_steps_per_second": 5.419, + "step": 615 + }, + { + "epoch": 1.0093096606806735, + "grad_norm": 0.5520356893539429, + "learning_rate": 1.980547431053456e-05, + "loss": 0.131, + "step": 620 + }, + { + "epoch": 1.0255888487561682, + "grad_norm": 0.6150078177452087, + "learning_rate": 1.9622115153168884e-05, + "loss": 0.1187, + "step": 630 + }, + { + "epoch": 1.041868036831663, + "grad_norm": 0.5100656151771545, + "learning_rate": 1.9436458168207117e-05, + "loss": 0.114, + "step": 640 + }, + { + "epoch": 1.0581472249071577, + "grad_norm": 0.5156052112579346, + "learning_rate": 1.9248563254672825e-05, + "loss": 0.1099, + "step": 650 + }, + { + "epoch": 1.0744264129826524, + "grad_norm": 0.4662775993347168, + "learning_rate": 1.9058491033618632e-05, + "loss": 0.1135, + "step": 660 + }, + { + "epoch": 1.0907056010581473, + "grad_norm": 0.4357255697250366, + "learning_rate": 1.886630282856787e-05, + "loss": 0.1036, + "step": 670 + }, + { + "epoch": 1.106984789133642, + "grad_norm": 0.3861764967441559, + "learning_rate": 1.867206064572962e-05, + "loss": 0.1145, + "step": 680 + }, + { + "epoch": 1.1232639772091366, + "grad_norm": 0.4562045633792877, + "learning_rate": 1.8475827153993447e-05, + "loss": 0.1107, + "step": 690 + }, + { + "epoch": 1.1395431652846315, + "grad_norm": 0.332917720079422, + "learning_rate": 1.8277665664710387e-05, + "loss": 0.1266, + "step": 700 + }, + { + "epoch": 1.1558223533601262, + "grad_norm": 0.5971720814704895, + "learning_rate": 1.807764011126663e-05, + "loss": 0.1122, + "step": 710 + }, + { + "epoch": 1.1721015414356208, + "grad_norm": 0.6102172136306763, + "learning_rate": 1.787581502845651e-05, + "loss": 0.1046, + "step": 720 + }, + { + "epoch": 1.1883807295111157, + "grad_norm": 0.5294010043144226, + "learning_rate": 1.767225553166146e-05, + "loss": 0.1044, + "step": 730 + }, + { + "epoch": 1.2046599175866104, + "grad_norm": 0.5074148178100586, + "learning_rate": 1.7467027295841688e-05, + "loss": 0.1251, + "step": 740 + }, + { + "epoch": 1.220939105662105, + "grad_norm": 0.6349917650222778, + "learning_rate": 1.7260196534347235e-05, + "loss": 0.1037, + "step": 750 + }, + { + "epoch": 1.2372182937376, + "grad_norm": 0.34580153226852417, + "learning_rate": 1.7051829977555426e-05, + "loss": 0.0831, + "step": 760 + }, + { + "epoch": 1.2534974818130946, + "grad_norm": 0.4629954993724823, + "learning_rate": 1.684199485134144e-05, + "loss": 0.1068, + "step": 770 + }, + { + "epoch": 1.2697766698885893, + "grad_norm": 0.6406750082969666, + "learning_rate": 1.6630758855389055e-05, + "loss": 0.1192, + "step": 780 + }, + { + "epoch": 1.286055857964084, + "grad_norm": 0.4982251226902008, + "learning_rate": 1.6418190141348485e-05, + "loss": 0.123, + "step": 790 + }, + { + "epoch": 1.3023350460395788, + "grad_norm": 0.5146717429161072, + "learning_rate": 1.6204357290848464e-05, + "loss": 0.0831, + "step": 800 + }, + { + "epoch": 1.3186142341150735, + "grad_norm": 0.4735712707042694, + "learning_rate": 1.5989329293369538e-05, + "loss": 0.0971, + "step": 810 + }, + { + "epoch": 1.3348934221905682, + "grad_norm": 0.7393200397491455, + "learning_rate": 1.5773175523985818e-05, + "loss": 0.0923, + "step": 820 + }, + { + "epoch": 1.3348934221905682, + "eval_loss": 0.22815725207328796, + "eval_runtime": 34.8794, + "eval_samples_per_second": 5.419, + "eval_steps_per_second": 5.419, + "step": 820 + }, + { + "epoch": 1.351172610266063, + "grad_norm": 0.8956180214881897, + "learning_rate": 1.5555965720982284e-05, + "loss": 0.0817, + "step": 830 + }, + { + "epoch": 1.3674517983415577, + "grad_norm": 0.7423743009567261, + "learning_rate": 1.533776996335497e-05, + "loss": 0.1178, + "step": 840 + }, + { + "epoch": 1.3837309864170524, + "grad_norm": 0.7034802436828613, + "learning_rate": 1.5118658648201145e-05, + "loss": 0.1289, + "step": 850 + }, + { + "epoch": 1.400010174492547, + "grad_norm": 0.48646238446235657, + "learning_rate": 1.4898702468006922e-05, + "loss": 0.0839, + "step": 860 + }, + { + "epoch": 1.416289362568042, + "grad_norm": 0.28704097867012024, + "learning_rate": 1.4677972387839548e-05, + "loss": 0.0974, + "step": 870 + }, + { + "epoch": 1.4325685506435366, + "grad_norm": 0.674045205116272, + "learning_rate": 1.4456539622451748e-05, + "loss": 0.1006, + "step": 880 + }, + { + "epoch": 1.4488477387190315, + "grad_norm": 0.3513787090778351, + "learning_rate": 1.4234475613305509e-05, + "loss": 0.1104, + "step": 890 + }, + { + "epoch": 1.4651269267945262, + "grad_norm": 0.8029477596282959, + "learning_rate": 1.4011852005522727e-05, + "loss": 0.1131, + "step": 900 + }, + { + "epoch": 1.4814061148700208, + "grad_norm": 0.5420731902122498, + "learning_rate": 1.378874062477015e-05, + "loss": 0.0943, + "step": 910 + }, + { + "epoch": 1.4976853029455155, + "grad_norm": 0.7574429512023926, + "learning_rate": 1.3565213454086048e-05, + "loss": 0.1234, + "step": 920 + }, + { + "epoch": 1.5139644910210102, + "grad_norm": 0.5867305994033813, + "learning_rate": 1.3341342610656157e-05, + "loss": 0.1036, + "step": 930 + }, + { + "epoch": 1.530243679096505, + "grad_norm": 0.47744086384773254, + "learning_rate": 1.311720032254629e-05, + "loss": 0.1082, + "step": 940 + }, + { + "epoch": 1.546522867172, + "grad_norm": 0.6975990533828735, + "learning_rate": 1.289285890539919e-05, + "loss": 0.0967, + "step": 950 + }, + { + "epoch": 1.5628020552474946, + "grad_norm": 0.7781053781509399, + "learning_rate": 1.2668390739103172e-05, + "loss": 0.1219, + "step": 960 + }, + { + "epoch": 1.5790812433229893, + "grad_norm": 0.5423984527587891, + "learning_rate": 1.2443868244439958e-05, + "loss": 0.1085, + "step": 970 + }, + { + "epoch": 1.595360431398484, + "grad_norm": 0.5535146594047546, + "learning_rate": 1.2219363859719392e-05, + "loss": 0.0942, + "step": 980 + }, + { + "epoch": 1.6116396194739786, + "grad_norm": 0.30531561374664307, + "learning_rate": 1.1994950017408451e-05, + "loss": 0.0944, + "step": 990 + }, + { + "epoch": 1.6279188075494735, + "grad_norm": 0.7325620055198669, + "learning_rate": 1.1770699120762161e-05, + "loss": 0.1126, + "step": 1000 + }, + { + "epoch": 1.6441979956249682, + "grad_norm": 1.1568708419799805, + "learning_rate": 1.1546683520463961e-05, + "loss": 0.1073, + "step": 1010 + }, + { + "epoch": 1.660477183700463, + "grad_norm": 0.6926931142807007, + "learning_rate": 1.1322975491282961e-05, + "loss": 0.0825, + "step": 1020 + }, + { + "epoch": 1.6686167777382104, + "eval_loss": 0.22156645357608795, + "eval_runtime": 34.8778, + "eval_samples_per_second": 5.419, + "eval_steps_per_second": 5.419, + "step": 1025 + } + ], + "logging_steps": 10, + "max_steps": 1842, + "num_input_tokens_seen": 0, + "num_train_epochs": 3, + "save_steps": 205, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 8.749744392064205e+17, + "train_batch_size": 1, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-1025/training_args.bin b/checkpoint-1025/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..2245771e9af5d2e40208df60ece818e12dad2609 --- /dev/null +++ b/checkpoint-1025/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fba26e4087d6f0d4a22136ed77169fa2a327b3a4a3accd6168e4c2098fb9b7b6 +size 5624 diff --git a/checkpoint-1230/README.md b/checkpoint-1230/README.md new file mode 100644 index 0000000000000000000000000000000000000000..20c165c7feaa880a18e0662941a5b246a698be2e --- /dev/null +++ b/checkpoint-1230/README.md @@ -0,0 +1,209 @@ +--- +base_model: Salesforce/Llama-xLAM-2-8b-fc-r +library_name: peft +pipeline_tag: text-generation +tags: +- base_model:adapter:Salesforce/Llama-xLAM-2-8b-fc-r +- lora +- sft +- transformers +- trl +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.17.1 \ No newline at end of file diff --git a/checkpoint-1230/adapter_config.json b/checkpoint-1230/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..9eda25bff7c128105e91aa1a00507aaa31dd53fd --- /dev/null +++ b/checkpoint-1230/adapter_config.json @@ -0,0 +1,39 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "Salesforce/Llama-xLAM-2-8b-fc-r", + "bias": "none", + "corda_config": null, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 32, + "lora_bias": false, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "qalora_group_size": 16, + "r": 16, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "q_proj", + "v_proj", + "k_proj", + "o_proj" + ], + "target_parameters": null, + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/checkpoint-1230/adapter_model.safetensors b/checkpoint-1230/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d6e18e3a4fd74ebd66cf37c5d47c272a13e5a333 --- /dev/null +++ b/checkpoint-1230/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:af419c5eafb0a4cfcb01ff0d5308d86173590fb28e2e01d585039a4b60a3d4a9 +size 54560368 diff --git a/checkpoint-1230/optimizer.pt b/checkpoint-1230/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..f472206ec59218cf86ab8d7c6d9d0e62d9287616 --- /dev/null +++ b/checkpoint-1230/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9c42633068b239bb9e174c997dfce5942895427dfa54a5979ab7a4fd86398d60 +size 109267450 diff --git a/checkpoint-1230/rng_state.pth b/checkpoint-1230/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..c26e4bbf42fdf2c8a9cb24fcc101220bfcb5cb6e --- /dev/null +++ b/checkpoint-1230/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f07bb37b633774cb05303aed37898fe0438d790bb8f5e2b166ad3a5babb7d448 +size 14244 diff --git a/checkpoint-1230/scheduler.pt b/checkpoint-1230/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..d31ab24e67c5a7af6e8be4ae58c2cf137d1ceceb --- /dev/null +++ b/checkpoint-1230/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:867c8961221afc97b2158a7291536dad426fc9a1783b28af0232e2c316fb2896 +size 1064 diff --git a/checkpoint-1230/special_tokens_map.json b/checkpoint-1230/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..1c69f55fd7bb10288d16d4c76497e21e25adfe4f --- /dev/null +++ b/checkpoint-1230/special_tokens_map.json @@ -0,0 +1,27 @@ +{ + "additional_special_tokens": [ + "<|eot_id|>", + "<|eom_id|>" + ], + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/checkpoint-1230/tokenizer.json b/checkpoint-1230/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..1c1d8d5c9024994f1d3b00f9662b8dd89ca13cf2 --- /dev/null +++ b/checkpoint-1230/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b9e4e7fb171f92fd137b777cc2714bf87d11576700a1dcd7a399e7bbe39537b +size 17209920 diff --git a/checkpoint-1230/tokenizer_config.json b/checkpoint-1230/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..3c3fc3b2c234aa8b39e8ee01e3ee9e6fe54a2a1f --- /dev/null +++ b/checkpoint-1230/tokenizer_config.json @@ -0,0 +1,2070 @@ +{ + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|python_tag|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "additional_special_tokens": [ + "<|eot_id|>", + "<|eom_id|>" + ], + "bos_token": "<|begin_of_text|>", + "chat_template": "{{- bos_token }}\n{%- if custom_tools is defined %}\n {%- set tools = custom_tools %}\n{%- endif %}\n{%- if not tools_in_user_message is defined %}\n {%- set tools_in_user_message = true %}\n{%- endif %}\n{%- if not tools is defined %}\n {%- set tools = none %}\n{%- endif %}\n\n{#- Extract system message #}\n{{- \"<|start_header_id|>system<|end_header_id|>\\n\\n\" }}\n{%- if messages[0]['role'] == 'system' %}\n {%- set system_message = messages[0]['content'] | trim %}\n {%- set messages = messages[1:] %}\n {{- system_message + \"\\n\" }}\n{%- else %}\n {%- set system_message = \"You are a helpful assistant that can use tools. You are developed by Salesforce xLAM team.\" %}\n {% set format_instruction %}You have access to a set of tools. When using tools, make calls in a single JSON array: \n\n[{\"name\": \"tool_call_name\", \"arguments\": {\"arg1\": \"value1\", \"arg2\": \"value2\"}}, ... (additional parallel tool calls as needed)]\n\nIf no tool is suitable, state that explicitly. If the user's input lacks required parameters, ask for clarification. Do not interpret or respond until tool results are returned. Once they are available, process them or make additional calls if needed. For tasks that don't require tools, such as casual conversation or general advice, respond directly in plain text. The available tools are:{% endset %}\n {{- system_message + \"\\n\" }}\n {%- if tools is not none %}\n {{- format_instruction + \"\\n\\n\" }}\n {%- endif %}\n{%- endif %}\n\n\n{%- if tools is not none %}\n {%- for t in tools %}\n {{- t | tojson(indent=4) }}\n {{- \"\\n\\n\" }}\n {%- endfor %}\n{%- endif %}\n{{- \"<|eot_id|>\" }}\n\n{%- for message in messages %}\n {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %}\n {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\\n\\n'+ message['content'] | trim + '<|eot_id|>' }}\n {%- elif 'tool_calls' in message %}\n {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' -}}\n {%- if message['tool_calls'] %}\n {{- \"[\" }}\n {%- for tool_call_function in message.tool_calls %}\n {%- set tool_call = tool_call_function.function %}\n {{- '{\"name\": \"' + tool_call.name + '\", ' }}\n {{- '\"arguments\": ' }}\n {{- tool_call.arguments | tojson }}\n {{- \"}\" }}\n {%- if not loop.last %}\n {{- \", \" }}\n {%- endif %}\n {%- endfor %}\n {{- \"]\" }}\n {{- \"<|eot_id|>\" }}\n {%- elif message['content'] %}\n {{- message['content'] | trim + '<|eot_id|>' }}\n {%- else %}\n {{- \"[]\\n\" + '<|eot_id|>' }}\n {%- endif %}\n {%- elif message.role == \"tool\" or message.role == \"ipython\" %}\n {{- \"<|start_header_id|>\" + \"ipython\" + \"<|end_header_id|>\\n\\n\" }}\n {%- set content = message[\"content\"] %}\n {%- if content is mapping or (content is iterable and content is not string) %}\n {{- content | tojson }}\n {%- else %}\n {{- content }}\n {%- endif %}\n {{- \"<|eot_id|>\" }}\n {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' }}\n{%- endif %}", + "clean_up_tokenization_spaces": true, + "eos_token": "<|eot_id|>", + "extra_special_tokens": {}, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 16384, + "pad_token": "<|eot_id|>", + "padding_side": "right", + "split_special_tokens": false, + "tokenizer_class": "PreTrainedTokenizerFast" +} diff --git a/checkpoint-1230/trainer_state.json b/checkpoint-1230/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..1f2b0b3f4aaf79727c6e007c949c235378a43e93 --- /dev/null +++ b/checkpoint-1230/trainer_state.json @@ -0,0 +1,942 @@ +{ + "best_metric": 0.21631866693496704, + "best_model_checkpoint": "./xlam_lora_new_ete_over_size_3epoch_multi_mix/checkpoint-1230", + "epoch": 2.0023401332858524, + "eval_steps": 205, + "global_step": 1230, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.016279188075494735, + "grad_norm": 0.7572630643844604, + "learning_rate": 2.688172043010753e-06, + "loss": 0.5223, + "step": 10 + }, + { + "epoch": 0.03255837615098947, + "grad_norm": 0.417061984539032, + "learning_rate": 5.376344086021506e-06, + "loss": 0.4858, + "step": 20 + }, + { + "epoch": 0.048837564226484206, + "grad_norm": 0.3718095123767853, + "learning_rate": 8.064516129032258e-06, + "loss": 0.4246, + "step": 30 + }, + { + "epoch": 0.06511675230197894, + "grad_norm": 0.2949349582195282, + "learning_rate": 1.0752688172043012e-05, + "loss": 0.4405, + "step": 40 + }, + { + "epoch": 0.08139594037747368, + "grad_norm": 0.3159159719944, + "learning_rate": 1.3440860215053763e-05, + "loss": 0.4148, + "step": 50 + }, + { + "epoch": 0.09767512845296841, + "grad_norm": 0.4167034327983856, + "learning_rate": 1.6129032258064517e-05, + "loss": 0.3393, + "step": 60 + }, + { + "epoch": 0.11395431652846315, + "grad_norm": 0.39410400390625, + "learning_rate": 1.881720430107527e-05, + "loss": 0.2464, + "step": 70 + }, + { + "epoch": 0.13023350460395788, + "grad_norm": 0.3644021153450012, + "learning_rate": 2.1505376344086024e-05, + "loss": 0.2294, + "step": 80 + }, + { + "epoch": 0.1465126926794526, + "grad_norm": 0.30372634530067444, + "learning_rate": 2.4193548387096777e-05, + "loss": 0.2315, + "step": 90 + }, + { + "epoch": 0.16279188075494735, + "grad_norm": 0.2586315870285034, + "learning_rate": 2.4999011923655086e-05, + "loss": 0.1932, + "step": 100 + }, + { + "epoch": 0.17907106883044208, + "grad_norm": 0.37825971841812134, + "learning_rate": 2.4994172742085852e-05, + "loss": 0.2204, + "step": 110 + }, + { + "epoch": 0.19535025690593683, + "grad_norm": 0.21422357857227325, + "learning_rate": 2.4985302531208654e-05, + "loss": 0.1795, + "step": 120 + }, + { + "epoch": 0.21162944498143155, + "grad_norm": 0.2566869854927063, + "learning_rate": 2.4972404152844008e-05, + "loss": 0.1668, + "step": 130 + }, + { + "epoch": 0.2279086330569263, + "grad_norm": 0.28194501996040344, + "learning_rate": 2.49554817684312e-05, + "loss": 0.1476, + "step": 140 + }, + { + "epoch": 0.24418782113242102, + "grad_norm": 0.24139340221881866, + "learning_rate": 2.4934540837685647e-05, + "loss": 0.1609, + "step": 150 + }, + { + "epoch": 0.26046700920791577, + "grad_norm": 0.3306334614753723, + "learning_rate": 2.490958811683741e-05, + "loss": 0.1638, + "step": 160 + }, + { + "epoch": 0.2767461972834105, + "grad_norm": 0.27301114797592163, + "learning_rate": 2.4880631656451447e-05, + "loss": 0.1494, + "step": 170 + }, + { + "epoch": 0.2930253853589052, + "grad_norm": 0.34037259221076965, + "learning_rate": 2.484768079883018e-05, + "loss": 0.1534, + "step": 180 + }, + { + "epoch": 0.30930457343439993, + "grad_norm": 0.2306762933731079, + "learning_rate": 2.4810746174999418e-05, + "loss": 0.1749, + "step": 190 + }, + { + "epoch": 0.3255837615098947, + "grad_norm": 0.3183388113975525, + "learning_rate": 2.476983970127841e-05, + "loss": 0.1482, + "step": 200 + }, + { + "epoch": 0.33372335554764204, + "eval_loss": 0.3089325428009033, + "eval_runtime": 34.8769, + "eval_samples_per_second": 5.419, + "eval_steps_per_second": 5.419, + "step": 205 + }, + { + "epoch": 0.34186294958538943, + "grad_norm": 0.28704971075057983, + "learning_rate": 2.472497457543525e-05, + "loss": 0.1471, + "step": 210 + }, + { + "epoch": 0.35814213766088415, + "grad_norm": 0.2939195930957794, + "learning_rate": 2.4676165272428866e-05, + "loss": 0.1631, + "step": 220 + }, + { + "epoch": 0.3744213257363789, + "grad_norm": 0.31506845355033875, + "learning_rate": 2.4623427539738897e-05, + "loss": 0.1353, + "step": 230 + }, + { + "epoch": 0.39070051381187365, + "grad_norm": 0.3761660158634186, + "learning_rate": 2.456677839228506e-05, + "loss": 0.1716, + "step": 240 + }, + { + "epoch": 0.4069797018873684, + "grad_norm": 0.29187777638435364, + "learning_rate": 2.450623610693757e-05, + "loss": 0.1195, + "step": 250 + }, + { + "epoch": 0.4232588899628631, + "grad_norm": 0.46237581968307495, + "learning_rate": 2.4441820216620425e-05, + "loss": 0.1484, + "step": 260 + }, + { + "epoch": 0.4395380780383578, + "grad_norm": 0.4580917954444885, + "learning_rate": 2.437355150400945e-05, + "loss": 0.1009, + "step": 270 + }, + { + "epoch": 0.4558172661138526, + "grad_norm": 0.4181467890739441, + "learning_rate": 2.4301451994827112e-05, + "loss": 0.1376, + "step": 280 + }, + { + "epoch": 0.4720964541893473, + "grad_norm": 0.3629908561706543, + "learning_rate": 2.422554495073633e-05, + "loss": 0.1083, + "step": 290 + }, + { + "epoch": 0.48837564226484204, + "grad_norm": 0.4282682240009308, + "learning_rate": 2.4145854861835447e-05, + "loss": 0.1373, + "step": 300 + }, + { + "epoch": 0.5046548303403368, + "grad_norm": 0.4914080500602722, + "learning_rate": 2.406240743875699e-05, + "loss": 0.156, + "step": 310 + }, + { + "epoch": 0.5209340184158315, + "grad_norm": 0.3880573511123657, + "learning_rate": 2.3975229604372526e-05, + "loss": 0.1415, + "step": 320 + }, + { + "epoch": 0.5372132064913262, + "grad_norm": 0.42599862813949585, + "learning_rate": 2.3884349485106477e-05, + "loss": 0.1338, + "step": 330 + }, + { + "epoch": 0.553492394566821, + "grad_norm": 0.4339046776294708, + "learning_rate": 2.378979640186163e-05, + "loss": 0.1368, + "step": 340 + }, + { + "epoch": 0.5697715826423158, + "grad_norm": 0.30713170766830444, + "learning_rate": 2.3691600860559222e-05, + "loss": 0.1154, + "step": 350 + }, + { + "epoch": 0.5860507707178104, + "grad_norm": 0.4618566036224365, + "learning_rate": 2.3589794542296764e-05, + "loss": 0.1203, + "step": 360 + }, + { + "epoch": 0.6023299587933052, + "grad_norm": 0.40802672505378723, + "learning_rate": 2.3484410293126664e-05, + "loss": 0.1144, + "step": 370 + }, + { + "epoch": 0.6186091468687999, + "grad_norm": 0.5242702960968018, + "learning_rate": 2.3375482113459014e-05, + "loss": 0.1281, + "step": 380 + }, + { + "epoch": 0.6348883349442946, + "grad_norm": 0.4045926630496979, + "learning_rate": 2.3263045147091944e-05, + "loss": 0.1145, + "step": 390 + }, + { + "epoch": 0.6511675230197894, + "grad_norm": 0.5347346067428589, + "learning_rate": 2.3147135669873096e-05, + "loss": 0.1256, + "step": 400 + }, + { + "epoch": 0.6674467110952841, + "grad_norm": 0.4755608141422272, + "learning_rate": 2.302779107799583e-05, + "loss": 0.1251, + "step": 410 + }, + { + "epoch": 0.6674467110952841, + "eval_loss": 0.26128318905830383, + "eval_runtime": 34.9177, + "eval_samples_per_second": 5.413, + "eval_steps_per_second": 5.413, + "step": 410 + }, + { + "epoch": 0.6837258991707789, + "grad_norm": 0.4720211923122406, + "learning_rate": 2.290504987593399e-05, + "loss": 0.1399, + "step": 420 + }, + { + "epoch": 0.7000050872462736, + "grad_norm": 0.709035336971283, + "learning_rate": 2.2778951664019105e-05, + "loss": 0.1375, + "step": 430 + }, + { + "epoch": 0.7162842753217683, + "grad_norm": 0.534866213798523, + "learning_rate": 2.2649537125664034e-05, + "loss": 0.1125, + "step": 440 + }, + { + "epoch": 0.7325634633972631, + "grad_norm": 0.522056519985199, + "learning_rate": 2.2516848014237146e-05, + "loss": 0.0943, + "step": 450 + }, + { + "epoch": 0.7488426514727577, + "grad_norm": 0.2830965518951416, + "learning_rate": 2.238092713959133e-05, + "loss": 0.1248, + "step": 460 + }, + { + "epoch": 0.7651218395482525, + "grad_norm": 0.39431601762771606, + "learning_rate": 2.2241818354252113e-05, + "loss": 0.1248, + "step": 470 + }, + { + "epoch": 0.7814010276237473, + "grad_norm": 0.4821482002735138, + "learning_rate": 2.209956653926944e-05, + "loss": 0.1359, + "step": 480 + }, + { + "epoch": 0.797680215699242, + "grad_norm": 0.4956236481666565, + "learning_rate": 2.1954217589737535e-05, + "loss": 0.1232, + "step": 490 + }, + { + "epoch": 0.8139594037747367, + "grad_norm": 0.49444642663002014, + "learning_rate": 2.180581839998766e-05, + "loss": 0.1031, + "step": 500 + }, + { + "epoch": 0.8302385918502315, + "grad_norm": 0.3857091963291168, + "learning_rate": 2.165441684845847e-05, + "loss": 0.1023, + "step": 510 + }, + { + "epoch": 0.8465177799257262, + "grad_norm": 0.4830643832683563, + "learning_rate": 2.150006178224886e-05, + "loss": 0.1067, + "step": 520 + }, + { + "epoch": 0.862796968001221, + "grad_norm": 0.5119408965110779, + "learning_rate": 2.1342803001358278e-05, + "loss": 0.1209, + "step": 530 + }, + { + "epoch": 0.8790761560767156, + "grad_norm": 0.46363013982772827, + "learning_rate": 2.118269124261963e-05, + "loss": 0.1134, + "step": 540 + }, + { + "epoch": 0.8953553441522104, + "grad_norm": 0.42933255434036255, + "learning_rate": 2.1019778163329912e-05, + "loss": 0.1101, + "step": 550 + }, + { + "epoch": 0.9116345322277052, + "grad_norm": 0.5474070906639099, + "learning_rate": 2.0854116324583867e-05, + "loss": 0.1291, + "step": 560 + }, + { + "epoch": 0.9279137203031999, + "grad_norm": 0.43502509593963623, + "learning_rate": 2.0685759174316066e-05, + "loss": 0.0936, + "step": 570 + }, + { + "epoch": 0.9441929083786946, + "grad_norm": 0.632621169090271, + "learning_rate": 2.051476103005684e-05, + "loss": 0.1196, + "step": 580 + }, + { + "epoch": 0.9604720964541893, + "grad_norm": 0.553187906742096, + "learning_rate": 2.034117706140768e-05, + "loss": 0.1186, + "step": 590 + }, + { + "epoch": 0.9767512845296841, + "grad_norm": 0.48446330428123474, + "learning_rate": 2.0165063272241712e-05, + "loss": 0.1249, + "step": 600 + }, + { + "epoch": 0.9930304726051788, + "grad_norm": 0.47837090492248535, + "learning_rate": 1.9986476482635003e-05, + "loss": 0.1097, + "step": 610 + }, + { + "epoch": 1.0011700666429262, + "eval_loss": 0.2388339340686798, + "eval_runtime": 34.8769, + "eval_samples_per_second": 5.419, + "eval_steps_per_second": 5.419, + "step": 615 + }, + { + "epoch": 1.0093096606806735, + "grad_norm": 0.5520356893539429, + "learning_rate": 1.980547431053456e-05, + "loss": 0.131, + "step": 620 + }, + { + "epoch": 1.0255888487561682, + "grad_norm": 0.6150078177452087, + "learning_rate": 1.9622115153168884e-05, + "loss": 0.1187, + "step": 630 + }, + { + "epoch": 1.041868036831663, + "grad_norm": 0.5100656151771545, + "learning_rate": 1.9436458168207117e-05, + "loss": 0.114, + "step": 640 + }, + { + "epoch": 1.0581472249071577, + "grad_norm": 0.5156052112579346, + "learning_rate": 1.9248563254672825e-05, + "loss": 0.1099, + "step": 650 + }, + { + "epoch": 1.0744264129826524, + "grad_norm": 0.4662775993347168, + "learning_rate": 1.9058491033618632e-05, + "loss": 0.1135, + "step": 660 + }, + { + "epoch": 1.0907056010581473, + "grad_norm": 0.4357255697250366, + "learning_rate": 1.886630282856787e-05, + "loss": 0.1036, + "step": 670 + }, + { + "epoch": 1.106984789133642, + "grad_norm": 0.3861764967441559, + "learning_rate": 1.867206064572962e-05, + "loss": 0.1145, + "step": 680 + }, + { + "epoch": 1.1232639772091366, + "grad_norm": 0.4562045633792877, + "learning_rate": 1.8475827153993447e-05, + "loss": 0.1107, + "step": 690 + }, + { + "epoch": 1.1395431652846315, + "grad_norm": 0.332917720079422, + "learning_rate": 1.8277665664710387e-05, + "loss": 0.1266, + "step": 700 + }, + { + "epoch": 1.1558223533601262, + "grad_norm": 0.5971720814704895, + "learning_rate": 1.807764011126663e-05, + "loss": 0.1122, + "step": 710 + }, + { + "epoch": 1.1721015414356208, + "grad_norm": 0.6102172136306763, + "learning_rate": 1.787581502845651e-05, + "loss": 0.1046, + "step": 720 + }, + { + "epoch": 1.1883807295111157, + "grad_norm": 0.5294010043144226, + "learning_rate": 1.767225553166146e-05, + "loss": 0.1044, + "step": 730 + }, + { + "epoch": 1.2046599175866104, + "grad_norm": 0.5074148178100586, + "learning_rate": 1.7467027295841688e-05, + "loss": 0.1251, + "step": 740 + }, + { + "epoch": 1.220939105662105, + "grad_norm": 0.6349917650222778, + "learning_rate": 1.7260196534347235e-05, + "loss": 0.1037, + "step": 750 + }, + { + "epoch": 1.2372182937376, + "grad_norm": 0.34580153226852417, + "learning_rate": 1.7051829977555426e-05, + "loss": 0.0831, + "step": 760 + }, + { + "epoch": 1.2534974818130946, + "grad_norm": 0.4629954993724823, + "learning_rate": 1.684199485134144e-05, + "loss": 0.1068, + "step": 770 + }, + { + "epoch": 1.2697766698885893, + "grad_norm": 0.6406750082969666, + "learning_rate": 1.6630758855389055e-05, + "loss": 0.1192, + "step": 780 + }, + { + "epoch": 1.286055857964084, + "grad_norm": 0.4982251226902008, + "learning_rate": 1.6418190141348485e-05, + "loss": 0.123, + "step": 790 + }, + { + "epoch": 1.3023350460395788, + "grad_norm": 0.5146717429161072, + "learning_rate": 1.6204357290848464e-05, + "loss": 0.0831, + "step": 800 + }, + { + "epoch": 1.3186142341150735, + "grad_norm": 0.4735712707042694, + "learning_rate": 1.5989329293369538e-05, + "loss": 0.0971, + "step": 810 + }, + { + "epoch": 1.3348934221905682, + "grad_norm": 0.7393200397491455, + "learning_rate": 1.5773175523985818e-05, + "loss": 0.0923, + "step": 820 + }, + { + "epoch": 1.3348934221905682, + "eval_loss": 0.22815725207328796, + "eval_runtime": 34.8794, + "eval_samples_per_second": 5.419, + "eval_steps_per_second": 5.419, + "step": 820 + }, + { + "epoch": 1.351172610266063, + "grad_norm": 0.8956180214881897, + "learning_rate": 1.5555965720982284e-05, + "loss": 0.0817, + "step": 830 + }, + { + "epoch": 1.3674517983415577, + "grad_norm": 0.7423743009567261, + "learning_rate": 1.533776996335497e-05, + "loss": 0.1178, + "step": 840 + }, + { + "epoch": 1.3837309864170524, + "grad_norm": 0.7034802436828613, + "learning_rate": 1.5118658648201145e-05, + "loss": 0.1289, + "step": 850 + }, + { + "epoch": 1.400010174492547, + "grad_norm": 0.48646238446235657, + "learning_rate": 1.4898702468006922e-05, + "loss": 0.0839, + "step": 860 + }, + { + "epoch": 1.416289362568042, + "grad_norm": 0.28704097867012024, + "learning_rate": 1.4677972387839548e-05, + "loss": 0.0974, + "step": 870 + }, + { + "epoch": 1.4325685506435366, + "grad_norm": 0.674045205116272, + "learning_rate": 1.4456539622451748e-05, + "loss": 0.1006, + "step": 880 + }, + { + "epoch": 1.4488477387190315, + "grad_norm": 0.3513787090778351, + "learning_rate": 1.4234475613305509e-05, + "loss": 0.1104, + "step": 890 + }, + { + "epoch": 1.4651269267945262, + "grad_norm": 0.8029477596282959, + "learning_rate": 1.4011852005522727e-05, + "loss": 0.1131, + "step": 900 + }, + { + "epoch": 1.4814061148700208, + "grad_norm": 0.5420731902122498, + "learning_rate": 1.378874062477015e-05, + "loss": 0.0943, + "step": 910 + }, + { + "epoch": 1.4976853029455155, + "grad_norm": 0.7574429512023926, + "learning_rate": 1.3565213454086048e-05, + "loss": 0.1234, + "step": 920 + }, + { + "epoch": 1.5139644910210102, + "grad_norm": 0.5867305994033813, + "learning_rate": 1.3341342610656157e-05, + "loss": 0.1036, + "step": 930 + }, + { + "epoch": 1.530243679096505, + "grad_norm": 0.47744086384773254, + "learning_rate": 1.311720032254629e-05, + "loss": 0.1082, + "step": 940 + }, + { + "epoch": 1.546522867172, + "grad_norm": 0.6975990533828735, + "learning_rate": 1.289285890539919e-05, + "loss": 0.0967, + "step": 950 + }, + { + "epoch": 1.5628020552474946, + "grad_norm": 0.7781053781509399, + "learning_rate": 1.2668390739103172e-05, + "loss": 0.1219, + "step": 960 + }, + { + "epoch": 1.5790812433229893, + "grad_norm": 0.5423984527587891, + "learning_rate": 1.2443868244439958e-05, + "loss": 0.1085, + "step": 970 + }, + { + "epoch": 1.595360431398484, + "grad_norm": 0.5535146594047546, + "learning_rate": 1.2219363859719392e-05, + "loss": 0.0942, + "step": 980 + }, + { + "epoch": 1.6116396194739786, + "grad_norm": 0.30531561374664307, + "learning_rate": 1.1994950017408451e-05, + "loss": 0.0944, + "step": 990 + }, + { + "epoch": 1.6279188075494735, + "grad_norm": 0.7325620055198669, + "learning_rate": 1.1770699120762161e-05, + "loss": 0.1126, + "step": 1000 + }, + { + "epoch": 1.6441979956249682, + "grad_norm": 1.1568708419799805, + "learning_rate": 1.1546683520463961e-05, + "loss": 0.1073, + "step": 1010 + }, + { + "epoch": 1.660477183700463, + "grad_norm": 0.6926931142807007, + "learning_rate": 1.1322975491282961e-05, + "loss": 0.0825, + "step": 1020 + }, + { + "epoch": 1.6686167777382104, + "eval_loss": 0.22156645357608795, + "eval_runtime": 34.8778, + "eval_samples_per_second": 5.419, + "eval_steps_per_second": 5.419, + "step": 1025 + }, + { + "epoch": 1.6767563717759577, + "grad_norm": 0.41277509927749634, + "learning_rate": 1.1099647208755764e-05, + "loss": 0.0991, + "step": 1030 + }, + { + "epoch": 1.6930355598514524, + "grad_norm": 0.4389091730117798, + "learning_rate": 1.0876770725900265e-05, + "loss": 0.088, + "step": 1040 + }, + { + "epoch": 1.709314747926947, + "grad_norm": 0.48445749282836914, + "learning_rate": 1.0654417949968986e-05, + "loss": 0.1158, + "step": 1050 + }, + { + "epoch": 1.725593936002442, + "grad_norm": 0.6507833003997803, + "learning_rate": 1.0432660619249448e-05, + "loss": 0.1099, + "step": 1060 + }, + { + "epoch": 1.7418731240779366, + "grad_norm": 0.6933814883232117, + "learning_rate": 1.0211570279919044e-05, + "loss": 0.0757, + "step": 1070 + }, + { + "epoch": 1.7581523121534315, + "grad_norm": 0.7795721292495728, + "learning_rate": 9.991218262961901e-06, + "loss": 0.1017, + "step": 1080 + }, + { + "epoch": 1.7744315002289262, + "grad_norm": 0.594406008720398, + "learning_rate": 9.771675661155165e-06, + "loss": 0.1144, + "step": 1090 + }, + { + "epoch": 1.7907106883044208, + "grad_norm": 0.34790194034576416, + "learning_rate": 9.553013306132158e-06, + "loss": 0.0904, + "step": 1100 + }, + { + "epoch": 1.8069898763799155, + "grad_norm": 0.4349744915962219, + "learning_rate": 9.335301745529751e-06, + "loss": 0.1085, + "step": 1110 + }, + { + "epoch": 1.8232690644554101, + "grad_norm": 0.5773786306381226, + "learning_rate": 9.118611220227399e-06, + "loss": 0.1038, + "step": 1120 + }, + { + "epoch": 1.839548252530905, + "grad_norm": 0.4364662766456604, + "learning_rate": 8.903011641685128e-06, + "loss": 0.097, + "step": 1130 + }, + { + "epoch": 1.8558274406063997, + "grad_norm": 0.7753048539161682, + "learning_rate": 8.688572569387817e-06, + "loss": 0.1045, + "step": 1140 + }, + { + "epoch": 1.8721066286818946, + "grad_norm": 0.48441290855407715, + "learning_rate": 8.475363188403022e-06, + "loss": 0.095, + "step": 1150 + }, + { + "epoch": 1.8883858167573893, + "grad_norm": 0.6351140141487122, + "learning_rate": 8.263452287059607e-06, + "loss": 0.0977, + "step": 1160 + }, + { + "epoch": 1.904665004832884, + "grad_norm": 0.8837946057319641, + "learning_rate": 8.052908234754376e-06, + "loss": 0.0987, + "step": 1170 + }, + { + "epoch": 1.9209441929083786, + "grad_norm": 0.48196184635162354, + "learning_rate": 7.84379895989388e-06, + "loss": 0.088, + "step": 1180 + }, + { + "epoch": 1.9372233809838735, + "grad_norm": 0.5001464486122131, + "learning_rate": 7.636191927978465e-06, + "loss": 0.1161, + "step": 1190 + }, + { + "epoch": 1.9535025690593681, + "grad_norm": 0.6405985951423645, + "learning_rate": 7.430154119835716e-06, + "loss": 0.1023, + "step": 1200 + }, + { + "epoch": 1.969781757134863, + "grad_norm": 0.7047804594039917, + "learning_rate": 7.225752010010231e-06, + "loss": 0.1131, + "step": 1210 + }, + { + "epoch": 1.9860609452103577, + "grad_norm": 0.5221819281578064, + "learning_rate": 7.023051545316763e-06, + "loss": 0.0948, + "step": 1220 + }, + { + "epoch": 2.0023401332858524, + "grad_norm": 0.4171787202358246, + "learning_rate": 6.822118123563614e-06, + "loss": 0.0995, + "step": 1230 + }, + { + "epoch": 2.0023401332858524, + "eval_loss": 0.21631866693496704, + "eval_runtime": 34.8988, + "eval_samples_per_second": 5.416, + "eval_steps_per_second": 5.416, + "step": 1230 + } + ], + "logging_steps": 10, + "max_steps": 1842, + "num_input_tokens_seen": 0, + "num_train_epochs": 3, + "save_steps": 205, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 1.0509548473594675e+18, + "train_batch_size": 1, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-1230/training_args.bin b/checkpoint-1230/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..2245771e9af5d2e40208df60ece818e12dad2609 --- /dev/null +++ b/checkpoint-1230/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fba26e4087d6f0d4a22136ed77169fa2a327b3a4a3accd6168e4c2098fb9b7b6 +size 5624 diff --git a/checkpoint-1435/README.md b/checkpoint-1435/README.md new file mode 100644 index 0000000000000000000000000000000000000000..20c165c7feaa880a18e0662941a5b246a698be2e --- /dev/null +++ b/checkpoint-1435/README.md @@ -0,0 +1,209 @@ +--- +base_model: Salesforce/Llama-xLAM-2-8b-fc-r +library_name: peft +pipeline_tag: text-generation +tags: +- base_model:adapter:Salesforce/Llama-xLAM-2-8b-fc-r +- lora +- sft +- transformers +- trl +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.17.1 \ No newline at end of file diff --git a/checkpoint-1435/adapter_config.json b/checkpoint-1435/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..9eda25bff7c128105e91aa1a00507aaa31dd53fd --- /dev/null +++ b/checkpoint-1435/adapter_config.json @@ -0,0 +1,39 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "Salesforce/Llama-xLAM-2-8b-fc-r", + "bias": "none", + "corda_config": null, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 32, + "lora_bias": false, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "qalora_group_size": 16, + "r": 16, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "q_proj", + "v_proj", + "k_proj", + "o_proj" + ], + "target_parameters": null, + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/checkpoint-1435/adapter_model.safetensors b/checkpoint-1435/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..3874cbbfc5c795fd417e2194d8805342d364e2aa --- /dev/null +++ b/checkpoint-1435/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cc51458577c95cb24e1181986225c0ea5734f13ab41ee1a3f25c848f224329d0 +size 54560368 diff --git a/checkpoint-1435/optimizer.pt b/checkpoint-1435/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..7747b3da61813c8a0c66d4a243d49db3fd904c52 --- /dev/null +++ b/checkpoint-1435/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c938c8eabeaca07457a3134830025b0de8f5ed9cab2afc3012031199eeae6b57 +size 109267450 diff --git a/checkpoint-1435/rng_state.pth b/checkpoint-1435/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..fd6d7ce61bf966959e54f410e83a3b30dac14002 --- /dev/null +++ b/checkpoint-1435/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6d802ca31d5b20b4026b35fe77a81e156f0011bc915f87b2cf9b2fc17e490270 +size 14244 diff --git a/checkpoint-1435/scheduler.pt b/checkpoint-1435/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..76644ce7971f13593afb94a5459028a801b2b4ba --- /dev/null +++ b/checkpoint-1435/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:77531cafbadf0e624c49ee4d65fcaeb0a396b8ec907da34ff2b81fc475d57522 +size 1064 diff --git a/checkpoint-1435/special_tokens_map.json b/checkpoint-1435/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..1c69f55fd7bb10288d16d4c76497e21e25adfe4f --- /dev/null +++ b/checkpoint-1435/special_tokens_map.json @@ -0,0 +1,27 @@ +{ + "additional_special_tokens": [ + "<|eot_id|>", + "<|eom_id|>" + ], + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/checkpoint-1435/tokenizer.json b/checkpoint-1435/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..1c1d8d5c9024994f1d3b00f9662b8dd89ca13cf2 --- /dev/null +++ b/checkpoint-1435/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b9e4e7fb171f92fd137b777cc2714bf87d11576700a1dcd7a399e7bbe39537b +size 17209920 diff --git a/checkpoint-1435/tokenizer_config.json b/checkpoint-1435/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..3c3fc3b2c234aa8b39e8ee01e3ee9e6fe54a2a1f --- /dev/null +++ b/checkpoint-1435/tokenizer_config.json @@ -0,0 +1,2070 @@ +{ + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|python_tag|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "additional_special_tokens": [ + "<|eot_id|>", + "<|eom_id|>" + ], + "bos_token": "<|begin_of_text|>", + "chat_template": "{{- bos_token }}\n{%- if custom_tools is defined %}\n {%- set tools = custom_tools %}\n{%- endif %}\n{%- if not tools_in_user_message is defined %}\n {%- set tools_in_user_message = true %}\n{%- endif %}\n{%- if not tools is defined %}\n {%- set tools = none %}\n{%- endif %}\n\n{#- Extract system message #}\n{{- \"<|start_header_id|>system<|end_header_id|>\\n\\n\" }}\n{%- if messages[0]['role'] == 'system' %}\n {%- set system_message = messages[0]['content'] | trim %}\n {%- set messages = messages[1:] %}\n {{- system_message + \"\\n\" }}\n{%- else %}\n {%- set system_message = \"You are a helpful assistant that can use tools. You are developed by Salesforce xLAM team.\" %}\n {% set format_instruction %}You have access to a set of tools. When using tools, make calls in a single JSON array: \n\n[{\"name\": \"tool_call_name\", \"arguments\": {\"arg1\": \"value1\", \"arg2\": \"value2\"}}, ... (additional parallel tool calls as needed)]\n\nIf no tool is suitable, state that explicitly. If the user's input lacks required parameters, ask for clarification. Do not interpret or respond until tool results are returned. Once they are available, process them or make additional calls if needed. For tasks that don't require tools, such as casual conversation or general advice, respond directly in plain text. The available tools are:{% endset %}\n {{- system_message + \"\\n\" }}\n {%- if tools is not none %}\n {{- format_instruction + \"\\n\\n\" }}\n {%- endif %}\n{%- endif %}\n\n\n{%- if tools is not none %}\n {%- for t in tools %}\n {{- t | tojson(indent=4) }}\n {{- \"\\n\\n\" }}\n {%- endfor %}\n{%- endif %}\n{{- \"<|eot_id|>\" }}\n\n{%- for message in messages %}\n {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %}\n {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\\n\\n'+ message['content'] | trim + '<|eot_id|>' }}\n {%- elif 'tool_calls' in message %}\n {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' -}}\n {%- if message['tool_calls'] %}\n {{- \"[\" }}\n {%- for tool_call_function in message.tool_calls %}\n {%- set tool_call = tool_call_function.function %}\n {{- '{\"name\": \"' + tool_call.name + '\", ' }}\n {{- '\"arguments\": ' }}\n {{- tool_call.arguments | tojson }}\n {{- \"}\" }}\n {%- if not loop.last %}\n {{- \", \" }}\n {%- endif %}\n {%- endfor %}\n {{- \"]\" }}\n {{- \"<|eot_id|>\" }}\n {%- elif message['content'] %}\n {{- message['content'] | trim + '<|eot_id|>' }}\n {%- else %}\n {{- \"[]\\n\" + '<|eot_id|>' }}\n {%- endif %}\n {%- elif message.role == \"tool\" or message.role == \"ipython\" %}\n {{- \"<|start_header_id|>\" + \"ipython\" + \"<|end_header_id|>\\n\\n\" }}\n {%- set content = message[\"content\"] %}\n {%- if content is mapping or (content is iterable and content is not string) %}\n {{- content | tojson }}\n {%- else %}\n {{- content }}\n {%- endif %}\n {{- \"<|eot_id|>\" }}\n {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' }}\n{%- endif %}", + "clean_up_tokenization_spaces": true, + "eos_token": "<|eot_id|>", + "extra_special_tokens": {}, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 16384, + "pad_token": "<|eot_id|>", + "padding_side": "right", + "split_special_tokens": false, + "tokenizer_class": "PreTrainedTokenizerFast" +} diff --git a/checkpoint-1435/trainer_state.json b/checkpoint-1435/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..0878b85af2caeca321bac2a274d346e5ac07cf8a --- /dev/null +++ b/checkpoint-1435/trainer_state.json @@ -0,0 +1,1090 @@ +{ + "best_metric": 0.21516536176204681, + "best_model_checkpoint": "./xlam_lora_new_ete_over_size_3epoch_multi_mix/checkpoint-1435", + "epoch": 2.3360634888334944, + "eval_steps": 205, + "global_step": 1435, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.016279188075494735, + "grad_norm": 0.7572630643844604, + "learning_rate": 2.688172043010753e-06, + "loss": 0.5223, + "step": 10 + }, + { + "epoch": 0.03255837615098947, + "grad_norm": 0.417061984539032, + "learning_rate": 5.376344086021506e-06, + "loss": 0.4858, + "step": 20 + }, + { + "epoch": 0.048837564226484206, + "grad_norm": 0.3718095123767853, + "learning_rate": 8.064516129032258e-06, + "loss": 0.4246, + "step": 30 + }, + { + "epoch": 0.06511675230197894, + "grad_norm": 0.2949349582195282, + "learning_rate": 1.0752688172043012e-05, + "loss": 0.4405, + "step": 40 + }, + { + "epoch": 0.08139594037747368, + "grad_norm": 0.3159159719944, + "learning_rate": 1.3440860215053763e-05, + "loss": 0.4148, + "step": 50 + }, + { + "epoch": 0.09767512845296841, + "grad_norm": 0.4167034327983856, + "learning_rate": 1.6129032258064517e-05, + "loss": 0.3393, + "step": 60 + }, + { + "epoch": 0.11395431652846315, + "grad_norm": 0.39410400390625, + "learning_rate": 1.881720430107527e-05, + "loss": 0.2464, + "step": 70 + }, + { + "epoch": 0.13023350460395788, + "grad_norm": 0.3644021153450012, + "learning_rate": 2.1505376344086024e-05, + "loss": 0.2294, + "step": 80 + }, + { + "epoch": 0.1465126926794526, + "grad_norm": 0.30372634530067444, + "learning_rate": 2.4193548387096777e-05, + "loss": 0.2315, + "step": 90 + }, + { + "epoch": 0.16279188075494735, + "grad_norm": 0.2586315870285034, + "learning_rate": 2.4999011923655086e-05, + "loss": 0.1932, + "step": 100 + }, + { + "epoch": 0.17907106883044208, + "grad_norm": 0.37825971841812134, + "learning_rate": 2.4994172742085852e-05, + "loss": 0.2204, + "step": 110 + }, + { + "epoch": 0.19535025690593683, + "grad_norm": 0.21422357857227325, + "learning_rate": 2.4985302531208654e-05, + "loss": 0.1795, + "step": 120 + }, + { + "epoch": 0.21162944498143155, + "grad_norm": 0.2566869854927063, + "learning_rate": 2.4972404152844008e-05, + "loss": 0.1668, + "step": 130 + }, + { + "epoch": 0.2279086330569263, + "grad_norm": 0.28194501996040344, + "learning_rate": 2.49554817684312e-05, + "loss": 0.1476, + "step": 140 + }, + { + "epoch": 0.24418782113242102, + "grad_norm": 0.24139340221881866, + "learning_rate": 2.4934540837685647e-05, + "loss": 0.1609, + "step": 150 + }, + { + "epoch": 0.26046700920791577, + "grad_norm": 0.3306334614753723, + "learning_rate": 2.490958811683741e-05, + "loss": 0.1638, + "step": 160 + }, + { + "epoch": 0.2767461972834105, + "grad_norm": 0.27301114797592163, + "learning_rate": 2.4880631656451447e-05, + "loss": 0.1494, + "step": 170 + }, + { + "epoch": 0.2930253853589052, + "grad_norm": 0.34037259221076965, + "learning_rate": 2.484768079883018e-05, + "loss": 0.1534, + "step": 180 + }, + { + "epoch": 0.30930457343439993, + "grad_norm": 0.2306762933731079, + "learning_rate": 2.4810746174999418e-05, + "loss": 0.1749, + "step": 190 + }, + { + "epoch": 0.3255837615098947, + "grad_norm": 0.3183388113975525, + "learning_rate": 2.476983970127841e-05, + "loss": 0.1482, + "step": 200 + }, + { + "epoch": 0.33372335554764204, + "eval_loss": 0.3089325428009033, + "eval_runtime": 34.8769, + "eval_samples_per_second": 5.419, + "eval_steps_per_second": 5.419, + "step": 205 + }, + { + "epoch": 0.34186294958538943, + "grad_norm": 0.28704971075057983, + "learning_rate": 2.472497457543525e-05, + "loss": 0.1471, + "step": 210 + }, + { + "epoch": 0.35814213766088415, + "grad_norm": 0.2939195930957794, + "learning_rate": 2.4676165272428866e-05, + "loss": 0.1631, + "step": 220 + }, + { + "epoch": 0.3744213257363789, + "grad_norm": 0.31506845355033875, + "learning_rate": 2.4623427539738897e-05, + "loss": 0.1353, + "step": 230 + }, + { + "epoch": 0.39070051381187365, + "grad_norm": 0.3761660158634186, + "learning_rate": 2.456677839228506e-05, + "loss": 0.1716, + "step": 240 + }, + { + "epoch": 0.4069797018873684, + "grad_norm": 0.29187777638435364, + "learning_rate": 2.450623610693757e-05, + "loss": 0.1195, + "step": 250 + }, + { + "epoch": 0.4232588899628631, + "grad_norm": 0.46237581968307495, + "learning_rate": 2.4441820216620425e-05, + "loss": 0.1484, + "step": 260 + }, + { + "epoch": 0.4395380780383578, + "grad_norm": 0.4580917954444885, + "learning_rate": 2.437355150400945e-05, + "loss": 0.1009, + "step": 270 + }, + { + "epoch": 0.4558172661138526, + "grad_norm": 0.4181467890739441, + "learning_rate": 2.4301451994827112e-05, + "loss": 0.1376, + "step": 280 + }, + { + "epoch": 0.4720964541893473, + "grad_norm": 0.3629908561706543, + "learning_rate": 2.422554495073633e-05, + "loss": 0.1083, + "step": 290 + }, + { + "epoch": 0.48837564226484204, + "grad_norm": 0.4282682240009308, + "learning_rate": 2.4145854861835447e-05, + "loss": 0.1373, + "step": 300 + }, + { + "epoch": 0.5046548303403368, + "grad_norm": 0.4914080500602722, + "learning_rate": 2.406240743875699e-05, + "loss": 0.156, + "step": 310 + }, + { + "epoch": 0.5209340184158315, + "grad_norm": 0.3880573511123657, + "learning_rate": 2.3975229604372526e-05, + "loss": 0.1415, + "step": 320 + }, + { + "epoch": 0.5372132064913262, + "grad_norm": 0.42599862813949585, + "learning_rate": 2.3884349485106477e-05, + "loss": 0.1338, + "step": 330 + }, + { + "epoch": 0.553492394566821, + "grad_norm": 0.4339046776294708, + "learning_rate": 2.378979640186163e-05, + "loss": 0.1368, + "step": 340 + }, + { + "epoch": 0.5697715826423158, + "grad_norm": 0.30713170766830444, + "learning_rate": 2.3691600860559222e-05, + "loss": 0.1154, + "step": 350 + }, + { + "epoch": 0.5860507707178104, + "grad_norm": 0.4618566036224365, + "learning_rate": 2.3589794542296764e-05, + "loss": 0.1203, + "step": 360 + }, + { + "epoch": 0.6023299587933052, + "grad_norm": 0.40802672505378723, + "learning_rate": 2.3484410293126664e-05, + "loss": 0.1144, + "step": 370 + }, + { + "epoch": 0.6186091468687999, + "grad_norm": 0.5242702960968018, + "learning_rate": 2.3375482113459014e-05, + "loss": 0.1281, + "step": 380 + }, + { + "epoch": 0.6348883349442946, + "grad_norm": 0.4045926630496979, + "learning_rate": 2.3263045147091944e-05, + "loss": 0.1145, + "step": 390 + }, + { + "epoch": 0.6511675230197894, + "grad_norm": 0.5347346067428589, + "learning_rate": 2.3147135669873096e-05, + "loss": 0.1256, + "step": 400 + }, + { + "epoch": 0.6674467110952841, + "grad_norm": 0.4755608141422272, + "learning_rate": 2.302779107799583e-05, + "loss": 0.1251, + "step": 410 + }, + { + "epoch": 0.6674467110952841, + "eval_loss": 0.26128318905830383, + "eval_runtime": 34.9177, + "eval_samples_per_second": 5.413, + "eval_steps_per_second": 5.413, + "step": 410 + }, + { + "epoch": 0.6837258991707789, + "grad_norm": 0.4720211923122406, + "learning_rate": 2.290504987593399e-05, + "loss": 0.1399, + "step": 420 + }, + { + "epoch": 0.7000050872462736, + "grad_norm": 0.709035336971283, + "learning_rate": 2.2778951664019105e-05, + "loss": 0.1375, + "step": 430 + }, + { + "epoch": 0.7162842753217683, + "grad_norm": 0.534866213798523, + "learning_rate": 2.2649537125664034e-05, + "loss": 0.1125, + "step": 440 + }, + { + "epoch": 0.7325634633972631, + "grad_norm": 0.522056519985199, + "learning_rate": 2.2516848014237146e-05, + "loss": 0.0943, + "step": 450 + }, + { + "epoch": 0.7488426514727577, + "grad_norm": 0.2830965518951416, + "learning_rate": 2.238092713959133e-05, + "loss": 0.1248, + "step": 460 + }, + { + "epoch": 0.7651218395482525, + "grad_norm": 0.39431601762771606, + "learning_rate": 2.2241818354252113e-05, + "loss": 0.1248, + "step": 470 + }, + { + "epoch": 0.7814010276237473, + "grad_norm": 0.4821482002735138, + "learning_rate": 2.209956653926944e-05, + "loss": 0.1359, + "step": 480 + }, + { + "epoch": 0.797680215699242, + "grad_norm": 0.4956236481666565, + "learning_rate": 2.1954217589737535e-05, + "loss": 0.1232, + "step": 490 + }, + { + "epoch": 0.8139594037747367, + "grad_norm": 0.49444642663002014, + "learning_rate": 2.180581839998766e-05, + "loss": 0.1031, + "step": 500 + }, + { + "epoch": 0.8302385918502315, + "grad_norm": 0.3857091963291168, + "learning_rate": 2.165441684845847e-05, + "loss": 0.1023, + "step": 510 + }, + { + "epoch": 0.8465177799257262, + "grad_norm": 0.4830643832683563, + "learning_rate": 2.150006178224886e-05, + "loss": 0.1067, + "step": 520 + }, + { + "epoch": 0.862796968001221, + "grad_norm": 0.5119408965110779, + "learning_rate": 2.1342803001358278e-05, + "loss": 0.1209, + "step": 530 + }, + { + "epoch": 0.8790761560767156, + "grad_norm": 0.46363013982772827, + "learning_rate": 2.118269124261963e-05, + "loss": 0.1134, + "step": 540 + }, + { + "epoch": 0.8953553441522104, + "grad_norm": 0.42933255434036255, + "learning_rate": 2.1019778163329912e-05, + "loss": 0.1101, + "step": 550 + }, + { + "epoch": 0.9116345322277052, + "grad_norm": 0.5474070906639099, + "learning_rate": 2.0854116324583867e-05, + "loss": 0.1291, + "step": 560 + }, + { + "epoch": 0.9279137203031999, + "grad_norm": 0.43502509593963623, + "learning_rate": 2.0685759174316066e-05, + "loss": 0.0936, + "step": 570 + }, + { + "epoch": 0.9441929083786946, + "grad_norm": 0.632621169090271, + "learning_rate": 2.051476103005684e-05, + "loss": 0.1196, + "step": 580 + }, + { + "epoch": 0.9604720964541893, + "grad_norm": 0.553187906742096, + "learning_rate": 2.034117706140768e-05, + "loss": 0.1186, + "step": 590 + }, + { + "epoch": 0.9767512845296841, + "grad_norm": 0.48446330428123474, + "learning_rate": 2.0165063272241712e-05, + "loss": 0.1249, + "step": 600 + }, + { + "epoch": 0.9930304726051788, + "grad_norm": 0.47837090492248535, + "learning_rate": 1.9986476482635003e-05, + "loss": 0.1097, + "step": 610 + }, + { + "epoch": 1.0011700666429262, + "eval_loss": 0.2388339340686798, + "eval_runtime": 34.8769, + "eval_samples_per_second": 5.419, + "eval_steps_per_second": 5.419, + "step": 615 + }, + { + "epoch": 1.0093096606806735, + "grad_norm": 0.5520356893539429, + "learning_rate": 1.980547431053456e-05, + "loss": 0.131, + "step": 620 + }, + { + "epoch": 1.0255888487561682, + "grad_norm": 0.6150078177452087, + "learning_rate": 1.9622115153168884e-05, + "loss": 0.1187, + "step": 630 + }, + { + "epoch": 1.041868036831663, + "grad_norm": 0.5100656151771545, + "learning_rate": 1.9436458168207117e-05, + "loss": 0.114, + "step": 640 + }, + { + "epoch": 1.0581472249071577, + "grad_norm": 0.5156052112579346, + "learning_rate": 1.9248563254672825e-05, + "loss": 0.1099, + "step": 650 + }, + { + "epoch": 1.0744264129826524, + "grad_norm": 0.4662775993347168, + "learning_rate": 1.9058491033618632e-05, + "loss": 0.1135, + "step": 660 + }, + { + "epoch": 1.0907056010581473, + "grad_norm": 0.4357255697250366, + "learning_rate": 1.886630282856787e-05, + "loss": 0.1036, + "step": 670 + }, + { + "epoch": 1.106984789133642, + "grad_norm": 0.3861764967441559, + "learning_rate": 1.867206064572962e-05, + "loss": 0.1145, + "step": 680 + }, + { + "epoch": 1.1232639772091366, + "grad_norm": 0.4562045633792877, + "learning_rate": 1.8475827153993447e-05, + "loss": 0.1107, + "step": 690 + }, + { + "epoch": 1.1395431652846315, + "grad_norm": 0.332917720079422, + "learning_rate": 1.8277665664710387e-05, + "loss": 0.1266, + "step": 700 + }, + { + "epoch": 1.1558223533601262, + "grad_norm": 0.5971720814704895, + "learning_rate": 1.807764011126663e-05, + "loss": 0.1122, + "step": 710 + }, + { + "epoch": 1.1721015414356208, + "grad_norm": 0.6102172136306763, + "learning_rate": 1.787581502845651e-05, + "loss": 0.1046, + "step": 720 + }, + { + "epoch": 1.1883807295111157, + "grad_norm": 0.5294010043144226, + "learning_rate": 1.767225553166146e-05, + "loss": 0.1044, + "step": 730 + }, + { + "epoch": 1.2046599175866104, + "grad_norm": 0.5074148178100586, + "learning_rate": 1.7467027295841688e-05, + "loss": 0.1251, + "step": 740 + }, + { + "epoch": 1.220939105662105, + "grad_norm": 0.6349917650222778, + "learning_rate": 1.7260196534347235e-05, + "loss": 0.1037, + "step": 750 + }, + { + "epoch": 1.2372182937376, + "grad_norm": 0.34580153226852417, + "learning_rate": 1.7051829977555426e-05, + "loss": 0.0831, + "step": 760 + }, + { + "epoch": 1.2534974818130946, + "grad_norm": 0.4629954993724823, + "learning_rate": 1.684199485134144e-05, + "loss": 0.1068, + "step": 770 + }, + { + "epoch": 1.2697766698885893, + "grad_norm": 0.6406750082969666, + "learning_rate": 1.6630758855389055e-05, + "loss": 0.1192, + "step": 780 + }, + { + "epoch": 1.286055857964084, + "grad_norm": 0.4982251226902008, + "learning_rate": 1.6418190141348485e-05, + "loss": 0.123, + "step": 790 + }, + { + "epoch": 1.3023350460395788, + "grad_norm": 0.5146717429161072, + "learning_rate": 1.6204357290848464e-05, + "loss": 0.0831, + "step": 800 + }, + { + "epoch": 1.3186142341150735, + "grad_norm": 0.4735712707042694, + "learning_rate": 1.5989329293369538e-05, + "loss": 0.0971, + "step": 810 + }, + { + "epoch": 1.3348934221905682, + "grad_norm": 0.7393200397491455, + "learning_rate": 1.5773175523985818e-05, + "loss": 0.0923, + "step": 820 + }, + { + "epoch": 1.3348934221905682, + "eval_loss": 0.22815725207328796, + "eval_runtime": 34.8794, + "eval_samples_per_second": 5.419, + "eval_steps_per_second": 5.419, + "step": 820 + }, + { + "epoch": 1.351172610266063, + "grad_norm": 0.8956180214881897, + "learning_rate": 1.5555965720982284e-05, + "loss": 0.0817, + "step": 830 + }, + { + "epoch": 1.3674517983415577, + "grad_norm": 0.7423743009567261, + "learning_rate": 1.533776996335497e-05, + "loss": 0.1178, + "step": 840 + }, + { + "epoch": 1.3837309864170524, + "grad_norm": 0.7034802436828613, + "learning_rate": 1.5118658648201145e-05, + "loss": 0.1289, + "step": 850 + }, + { + "epoch": 1.400010174492547, + "grad_norm": 0.48646238446235657, + "learning_rate": 1.4898702468006922e-05, + "loss": 0.0839, + "step": 860 + }, + { + "epoch": 1.416289362568042, + "grad_norm": 0.28704097867012024, + "learning_rate": 1.4677972387839548e-05, + "loss": 0.0974, + "step": 870 + }, + { + "epoch": 1.4325685506435366, + "grad_norm": 0.674045205116272, + "learning_rate": 1.4456539622451748e-05, + "loss": 0.1006, + "step": 880 + }, + { + "epoch": 1.4488477387190315, + "grad_norm": 0.3513787090778351, + "learning_rate": 1.4234475613305509e-05, + "loss": 0.1104, + "step": 890 + }, + { + "epoch": 1.4651269267945262, + "grad_norm": 0.8029477596282959, + "learning_rate": 1.4011852005522727e-05, + "loss": 0.1131, + "step": 900 + }, + { + "epoch": 1.4814061148700208, + "grad_norm": 0.5420731902122498, + "learning_rate": 1.378874062477015e-05, + "loss": 0.0943, + "step": 910 + }, + { + "epoch": 1.4976853029455155, + "grad_norm": 0.7574429512023926, + "learning_rate": 1.3565213454086048e-05, + "loss": 0.1234, + "step": 920 + }, + { + "epoch": 1.5139644910210102, + "grad_norm": 0.5867305994033813, + "learning_rate": 1.3341342610656157e-05, + "loss": 0.1036, + "step": 930 + }, + { + "epoch": 1.530243679096505, + "grad_norm": 0.47744086384773254, + "learning_rate": 1.311720032254629e-05, + "loss": 0.1082, + "step": 940 + }, + { + "epoch": 1.546522867172, + "grad_norm": 0.6975990533828735, + "learning_rate": 1.289285890539919e-05, + "loss": 0.0967, + "step": 950 + }, + { + "epoch": 1.5628020552474946, + "grad_norm": 0.7781053781509399, + "learning_rate": 1.2668390739103172e-05, + "loss": 0.1219, + "step": 960 + }, + { + "epoch": 1.5790812433229893, + "grad_norm": 0.5423984527587891, + "learning_rate": 1.2443868244439958e-05, + "loss": 0.1085, + "step": 970 + }, + { + "epoch": 1.595360431398484, + "grad_norm": 0.5535146594047546, + "learning_rate": 1.2219363859719392e-05, + "loss": 0.0942, + "step": 980 + }, + { + "epoch": 1.6116396194739786, + "grad_norm": 0.30531561374664307, + "learning_rate": 1.1994950017408451e-05, + "loss": 0.0944, + "step": 990 + }, + { + "epoch": 1.6279188075494735, + "grad_norm": 0.7325620055198669, + "learning_rate": 1.1770699120762161e-05, + "loss": 0.1126, + "step": 1000 + }, + { + "epoch": 1.6441979956249682, + "grad_norm": 1.1568708419799805, + "learning_rate": 1.1546683520463961e-05, + "loss": 0.1073, + "step": 1010 + }, + { + "epoch": 1.660477183700463, + "grad_norm": 0.6926931142807007, + "learning_rate": 1.1322975491282961e-05, + "loss": 0.0825, + "step": 1020 + }, + { + "epoch": 1.6686167777382104, + "eval_loss": 0.22156645357608795, + "eval_runtime": 34.8778, + "eval_samples_per_second": 5.419, + "eval_steps_per_second": 5.419, + "step": 1025 + }, + { + "epoch": 1.6767563717759577, + "grad_norm": 0.41277509927749634, + "learning_rate": 1.1099647208755764e-05, + "loss": 0.0991, + "step": 1030 + }, + { + "epoch": 1.6930355598514524, + "grad_norm": 0.4389091730117798, + "learning_rate": 1.0876770725900265e-05, + "loss": 0.088, + "step": 1040 + }, + { + "epoch": 1.709314747926947, + "grad_norm": 0.48445749282836914, + "learning_rate": 1.0654417949968986e-05, + "loss": 0.1158, + "step": 1050 + }, + { + "epoch": 1.725593936002442, + "grad_norm": 0.6507833003997803, + "learning_rate": 1.0432660619249448e-05, + "loss": 0.1099, + "step": 1060 + }, + { + "epoch": 1.7418731240779366, + "grad_norm": 0.6933814883232117, + "learning_rate": 1.0211570279919044e-05, + "loss": 0.0757, + "step": 1070 + }, + { + "epoch": 1.7581523121534315, + "grad_norm": 0.7795721292495728, + "learning_rate": 9.991218262961901e-06, + "loss": 0.1017, + "step": 1080 + }, + { + "epoch": 1.7744315002289262, + "grad_norm": 0.594406008720398, + "learning_rate": 9.771675661155165e-06, + "loss": 0.1144, + "step": 1090 + }, + { + "epoch": 1.7907106883044208, + "grad_norm": 0.34790194034576416, + "learning_rate": 9.553013306132158e-06, + "loss": 0.0904, + "step": 1100 + }, + { + "epoch": 1.8069898763799155, + "grad_norm": 0.4349744915962219, + "learning_rate": 9.335301745529751e-06, + "loss": 0.1085, + "step": 1110 + }, + { + "epoch": 1.8232690644554101, + "grad_norm": 0.5773786306381226, + "learning_rate": 9.118611220227399e-06, + "loss": 0.1038, + "step": 1120 + }, + { + "epoch": 1.839548252530905, + "grad_norm": 0.4364662766456604, + "learning_rate": 8.903011641685128e-06, + "loss": 0.097, + "step": 1130 + }, + { + "epoch": 1.8558274406063997, + "grad_norm": 0.7753048539161682, + "learning_rate": 8.688572569387817e-06, + "loss": 0.1045, + "step": 1140 + }, + { + "epoch": 1.8721066286818946, + "grad_norm": 0.48441290855407715, + "learning_rate": 8.475363188403022e-06, + "loss": 0.095, + "step": 1150 + }, + { + "epoch": 1.8883858167573893, + "grad_norm": 0.6351140141487122, + "learning_rate": 8.263452287059607e-06, + "loss": 0.0977, + "step": 1160 + }, + { + "epoch": 1.904665004832884, + "grad_norm": 0.8837946057319641, + "learning_rate": 8.052908234754376e-06, + "loss": 0.0987, + "step": 1170 + }, + { + "epoch": 1.9209441929083786, + "grad_norm": 0.48196184635162354, + "learning_rate": 7.84379895989388e-06, + "loss": 0.088, + "step": 1180 + }, + { + "epoch": 1.9372233809838735, + "grad_norm": 0.5001464486122131, + "learning_rate": 7.636191927978465e-06, + "loss": 0.1161, + "step": 1190 + }, + { + "epoch": 1.9535025690593681, + "grad_norm": 0.6405985951423645, + "learning_rate": 7.430154119835716e-06, + "loss": 0.1023, + "step": 1200 + }, + { + "epoch": 1.969781757134863, + "grad_norm": 0.7047804594039917, + "learning_rate": 7.225752010010231e-06, + "loss": 0.1131, + "step": 1210 + }, + { + "epoch": 1.9860609452103577, + "grad_norm": 0.5221819281578064, + "learning_rate": 7.023051545316763e-06, + "loss": 0.0948, + "step": 1220 + }, + { + "epoch": 2.0023401332858524, + "grad_norm": 0.4171787202358246, + "learning_rate": 6.822118123563614e-06, + "loss": 0.0995, + "step": 1230 + }, + { + "epoch": 2.0023401332858524, + "eval_loss": 0.21631866693496704, + "eval_runtime": 34.8988, + "eval_samples_per_second": 5.416, + "eval_steps_per_second": 5.416, + "step": 1230 + }, + { + "epoch": 2.018619321361347, + "grad_norm": 0.7596387267112732, + "learning_rate": 6.623016572453172e-06, + "loss": 0.104, + "step": 1240 + }, + { + "epoch": 2.0348985094368417, + "grad_norm": 0.3702397346496582, + "learning_rate": 6.425811128666353e-06, + "loss": 0.0693, + "step": 1250 + }, + { + "epoch": 2.0511776975123364, + "grad_norm": 0.605099081993103, + "learning_rate": 6.230565417137758e-06, + "loss": 0.097, + "step": 1260 + }, + { + "epoch": 2.0674568855878315, + "grad_norm": 0.4555053412914276, + "learning_rate": 6.03734243052818e-06, + "loss": 0.0976, + "step": 1270 + }, + { + "epoch": 2.083736073663326, + "grad_norm": 0.7848448157310486, + "learning_rate": 5.8462045089011066e-06, + "loss": 0.1013, + "step": 1280 + }, + { + "epoch": 2.100015261738821, + "grad_norm": 0.6905212998390198, + "learning_rate": 5.657213319609776e-06, + "loss": 0.1094, + "step": 1290 + }, + { + "epoch": 2.1162944498143155, + "grad_norm": 0.5153264999389648, + "learning_rate": 5.4704298374012834e-06, + "loss": 0.0789, + "step": 1300 + }, + { + "epoch": 2.13257363788981, + "grad_norm": 0.8393344879150391, + "learning_rate": 5.2859143247441e-06, + "loss": 0.0904, + "step": 1310 + }, + { + "epoch": 2.148852825965305, + "grad_norm": 0.7440715432167053, + "learning_rate": 5.103726312385452e-06, + "loss": 0.0938, + "step": 1320 + }, + { + "epoch": 2.1651320140408, + "grad_norm": 0.8069117069244385, + "learning_rate": 4.923924580144743e-06, + "loss": 0.0908, + "step": 1330 + }, + { + "epoch": 2.1814112021162946, + "grad_norm": 0.5500065088272095, + "learning_rate": 4.746567137949261e-06, + "loss": 0.0976, + "step": 1340 + }, + { + "epoch": 2.1976903901917892, + "grad_norm": 0.51816725730896, + "learning_rate": 4.5717112071182715e-06, + "loss": 0.0889, + "step": 1350 + }, + { + "epoch": 2.213969578267284, + "grad_norm": 0.4226435124874115, + "learning_rate": 4.399413201901559e-06, + "loss": 0.0814, + "step": 1360 + }, + { + "epoch": 2.2302487663427786, + "grad_norm": 0.4923081398010254, + "learning_rate": 4.229728711278325e-06, + "loss": 0.086, + "step": 1370 + }, + { + "epoch": 2.2465279544182732, + "grad_norm": 0.5883035659790039, + "learning_rate": 4.062712481022371e-06, + "loss": 0.095, + "step": 1380 + }, + { + "epoch": 2.2628071424937684, + "grad_norm": 0.5114026069641113, + "learning_rate": 3.898418396039323e-06, + "loss": 0.1038, + "step": 1390 + }, + { + "epoch": 2.279086330569263, + "grad_norm": 0.5486142039299011, + "learning_rate": 3.7368994629815953e-06, + "loss": 0.0902, + "step": 1400 + }, + { + "epoch": 2.2953655186447577, + "grad_norm": 0.756912350654602, + "learning_rate": 3.5782077931467e-06, + "loss": 0.0706, + "step": 1410 + }, + { + "epoch": 2.3116447067202524, + "grad_norm": 0.6888672709465027, + "learning_rate": 3.42239458566444e-06, + "loss": 0.1065, + "step": 1420 + }, + { + "epoch": 2.327923894795747, + "grad_norm": 0.5472647547721863, + "learning_rate": 3.269510110978398e-06, + "loss": 0.0815, + "step": 1430 + }, + { + "epoch": 2.3360634888334944, + "eval_loss": 0.21516536176204681, + "eval_runtime": 34.891, + "eval_samples_per_second": 5.417, + "eval_steps_per_second": 5.417, + "step": 1435 + } + ], + "logging_steps": 10, + "max_steps": 1842, + "num_input_tokens_seen": 0, + "num_train_epochs": 3, + "save_steps": 205, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 1.2262380146884608e+18, + "train_batch_size": 1, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-1435/training_args.bin b/checkpoint-1435/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..2245771e9af5d2e40208df60ece818e12dad2609 --- /dev/null +++ b/checkpoint-1435/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fba26e4087d6f0d4a22136ed77169fa2a327b3a4a3accd6168e4c2098fb9b7b6 +size 5624 diff --git a/checkpoint-1640/README.md b/checkpoint-1640/README.md new file mode 100644 index 0000000000000000000000000000000000000000..20c165c7feaa880a18e0662941a5b246a698be2e --- /dev/null +++ b/checkpoint-1640/README.md @@ -0,0 +1,209 @@ +--- +base_model: Salesforce/Llama-xLAM-2-8b-fc-r +library_name: peft +pipeline_tag: text-generation +tags: +- base_model:adapter:Salesforce/Llama-xLAM-2-8b-fc-r +- lora +- sft +- transformers +- trl +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.17.1 \ No newline at end of file diff --git a/checkpoint-1640/adapter_config.json b/checkpoint-1640/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..9eda25bff7c128105e91aa1a00507aaa31dd53fd --- /dev/null +++ b/checkpoint-1640/adapter_config.json @@ -0,0 +1,39 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "Salesforce/Llama-xLAM-2-8b-fc-r", + "bias": "none", + "corda_config": null, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 32, + "lora_bias": false, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "qalora_group_size": 16, + "r": 16, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "q_proj", + "v_proj", + "k_proj", + "o_proj" + ], + "target_parameters": null, + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/checkpoint-1640/adapter_model.safetensors b/checkpoint-1640/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..850b8a7b7e98c250c049c275821793dc06f6bd0f --- /dev/null +++ b/checkpoint-1640/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9dfa231e6a429050ed62c5c2fd8478b22f783cde633e267292cba7c756066ef2 +size 54560368 diff --git a/checkpoint-1640/optimizer.pt b/checkpoint-1640/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..770867f3d645a2064ce09d014a38f52237981a1a --- /dev/null +++ b/checkpoint-1640/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c3887242a43bc644605c9075557c8ab010ed0be015364726743c997439baac73 +size 109267450 diff --git a/checkpoint-1640/rng_state.pth b/checkpoint-1640/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..73ee986d325bf2fe20e264620856d9ab93c39275 --- /dev/null +++ b/checkpoint-1640/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:248b35902521c386687daaeaba32aabfad3a94e1c211319f38569cde3bdd7887 +size 14244 diff --git a/checkpoint-1640/scheduler.pt b/checkpoint-1640/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..1bf87ea34af7410a748f5a7c8d05d6de024d3f10 --- /dev/null +++ b/checkpoint-1640/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4bc40fde008cb44bdff17a66a35ca212cd851652843a238a67dde333a9593841 +size 1064 diff --git a/checkpoint-1640/special_tokens_map.json b/checkpoint-1640/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..1c69f55fd7bb10288d16d4c76497e21e25adfe4f --- /dev/null +++ b/checkpoint-1640/special_tokens_map.json @@ -0,0 +1,27 @@ +{ + "additional_special_tokens": [ + "<|eot_id|>", + "<|eom_id|>" + ], + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/checkpoint-1640/tokenizer.json b/checkpoint-1640/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..1c1d8d5c9024994f1d3b00f9662b8dd89ca13cf2 --- /dev/null +++ b/checkpoint-1640/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b9e4e7fb171f92fd137b777cc2714bf87d11576700a1dcd7a399e7bbe39537b +size 17209920 diff --git a/checkpoint-1640/tokenizer_config.json b/checkpoint-1640/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..3c3fc3b2c234aa8b39e8ee01e3ee9e6fe54a2a1f --- /dev/null +++ b/checkpoint-1640/tokenizer_config.json @@ -0,0 +1,2070 @@ +{ + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|python_tag|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "additional_special_tokens": [ + "<|eot_id|>", + "<|eom_id|>" + ], + "bos_token": "<|begin_of_text|>", + "chat_template": "{{- bos_token }}\n{%- if custom_tools is defined %}\n {%- set tools = custom_tools %}\n{%- endif %}\n{%- if not tools_in_user_message is defined %}\n {%- set tools_in_user_message = true %}\n{%- endif %}\n{%- if not tools is defined %}\n {%- set tools = none %}\n{%- endif %}\n\n{#- Extract system message #}\n{{- \"<|start_header_id|>system<|end_header_id|>\\n\\n\" }}\n{%- if messages[0]['role'] == 'system' %}\n {%- set system_message = messages[0]['content'] | trim %}\n {%- set messages = messages[1:] %}\n {{- system_message + \"\\n\" }}\n{%- else %}\n {%- set system_message = \"You are a helpful assistant that can use tools. You are developed by Salesforce xLAM team.\" %}\n {% set format_instruction %}You have access to a set of tools. When using tools, make calls in a single JSON array: \n\n[{\"name\": \"tool_call_name\", \"arguments\": {\"arg1\": \"value1\", \"arg2\": \"value2\"}}, ... (additional parallel tool calls as needed)]\n\nIf no tool is suitable, state that explicitly. If the user's input lacks required parameters, ask for clarification. Do not interpret or respond until tool results are returned. Once they are available, process them or make additional calls if needed. For tasks that don't require tools, such as casual conversation or general advice, respond directly in plain text. The available tools are:{% endset %}\n {{- system_message + \"\\n\" }}\n {%- if tools is not none %}\n {{- format_instruction + \"\\n\\n\" }}\n {%- endif %}\n{%- endif %}\n\n\n{%- if tools is not none %}\n {%- for t in tools %}\n {{- t | tojson(indent=4) }}\n {{- \"\\n\\n\" }}\n {%- endfor %}\n{%- endif %}\n{{- \"<|eot_id|>\" }}\n\n{%- for message in messages %}\n {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %}\n {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\\n\\n'+ message['content'] | trim + '<|eot_id|>' }}\n {%- elif 'tool_calls' in message %}\n {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' -}}\n {%- if message['tool_calls'] %}\n {{- \"[\" }}\n {%- for tool_call_function in message.tool_calls %}\n {%- set tool_call = tool_call_function.function %}\n {{- '{\"name\": \"' + tool_call.name + '\", ' }}\n {{- '\"arguments\": ' }}\n {{- tool_call.arguments | tojson }}\n {{- \"}\" }}\n {%- if not loop.last %}\n {{- \", \" }}\n {%- endif %}\n {%- endfor %}\n {{- \"]\" }}\n {{- \"<|eot_id|>\" }}\n {%- elif message['content'] %}\n {{- message['content'] | trim + '<|eot_id|>' }}\n {%- else %}\n {{- \"[]\\n\" + '<|eot_id|>' }}\n {%- endif %}\n {%- elif message.role == \"tool\" or message.role == \"ipython\" %}\n {{- \"<|start_header_id|>\" + \"ipython\" + \"<|end_header_id|>\\n\\n\" }}\n {%- set content = message[\"content\"] %}\n {%- if content is mapping or (content is iterable and content is not string) %}\n {{- content | tojson }}\n {%- else %}\n {{- content }}\n {%- endif %}\n {{- \"<|eot_id|>\" }}\n {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' }}\n{%- endif %}", + "clean_up_tokenization_spaces": true, + "eos_token": "<|eot_id|>", + "extra_special_tokens": {}, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 16384, + "pad_token": "<|eot_id|>", + "padding_side": "right", + "split_special_tokens": false, + "tokenizer_class": "PreTrainedTokenizerFast" +} diff --git a/checkpoint-1640/trainer_state.json b/checkpoint-1640/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..ea6992b1dea59881e3058cc0fd0cbcd5c04e6a45 --- /dev/null +++ b/checkpoint-1640/trainer_state.json @@ -0,0 +1,1245 @@ +{ + "best_metric": 0.21331782639026642, + "best_model_checkpoint": "./xlam_lora_new_ete_over_size_3epoch_multi_mix/checkpoint-1640", + "epoch": 2.6697868443811363, + "eval_steps": 205, + "global_step": 1640, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.016279188075494735, + "grad_norm": 0.7572630643844604, + "learning_rate": 2.688172043010753e-06, + "loss": 0.5223, + "step": 10 + }, + { + "epoch": 0.03255837615098947, + "grad_norm": 0.417061984539032, + "learning_rate": 5.376344086021506e-06, + "loss": 0.4858, + "step": 20 + }, + { + "epoch": 0.048837564226484206, + "grad_norm": 0.3718095123767853, + "learning_rate": 8.064516129032258e-06, + "loss": 0.4246, + "step": 30 + }, + { + "epoch": 0.06511675230197894, + "grad_norm": 0.2949349582195282, + "learning_rate": 1.0752688172043012e-05, + "loss": 0.4405, + "step": 40 + }, + { + "epoch": 0.08139594037747368, + "grad_norm": 0.3159159719944, + "learning_rate": 1.3440860215053763e-05, + "loss": 0.4148, + "step": 50 + }, + { + "epoch": 0.09767512845296841, + "grad_norm": 0.4167034327983856, + "learning_rate": 1.6129032258064517e-05, + "loss": 0.3393, + "step": 60 + }, + { + "epoch": 0.11395431652846315, + "grad_norm": 0.39410400390625, + "learning_rate": 1.881720430107527e-05, + "loss": 0.2464, + "step": 70 + }, + { + "epoch": 0.13023350460395788, + "grad_norm": 0.3644021153450012, + "learning_rate": 2.1505376344086024e-05, + "loss": 0.2294, + "step": 80 + }, + { + "epoch": 0.1465126926794526, + "grad_norm": 0.30372634530067444, + "learning_rate": 2.4193548387096777e-05, + "loss": 0.2315, + "step": 90 + }, + { + "epoch": 0.16279188075494735, + "grad_norm": 0.2586315870285034, + "learning_rate": 2.4999011923655086e-05, + "loss": 0.1932, + "step": 100 + }, + { + "epoch": 0.17907106883044208, + "grad_norm": 0.37825971841812134, + "learning_rate": 2.4994172742085852e-05, + "loss": 0.2204, + "step": 110 + }, + { + "epoch": 0.19535025690593683, + "grad_norm": 0.21422357857227325, + "learning_rate": 2.4985302531208654e-05, + "loss": 0.1795, + "step": 120 + }, + { + "epoch": 0.21162944498143155, + "grad_norm": 0.2566869854927063, + "learning_rate": 2.4972404152844008e-05, + "loss": 0.1668, + "step": 130 + }, + { + "epoch": 0.2279086330569263, + "grad_norm": 0.28194501996040344, + "learning_rate": 2.49554817684312e-05, + "loss": 0.1476, + "step": 140 + }, + { + "epoch": 0.24418782113242102, + "grad_norm": 0.24139340221881866, + "learning_rate": 2.4934540837685647e-05, + "loss": 0.1609, + "step": 150 + }, + { + "epoch": 0.26046700920791577, + "grad_norm": 0.3306334614753723, + "learning_rate": 2.490958811683741e-05, + "loss": 0.1638, + "step": 160 + }, + { + "epoch": 0.2767461972834105, + "grad_norm": 0.27301114797592163, + "learning_rate": 2.4880631656451447e-05, + "loss": 0.1494, + "step": 170 + }, + { + "epoch": 0.2930253853589052, + "grad_norm": 0.34037259221076965, + "learning_rate": 2.484768079883018e-05, + "loss": 0.1534, + "step": 180 + }, + { + "epoch": 0.30930457343439993, + "grad_norm": 0.2306762933731079, + "learning_rate": 2.4810746174999418e-05, + "loss": 0.1749, + "step": 190 + }, + { + "epoch": 0.3255837615098947, + "grad_norm": 0.3183388113975525, + "learning_rate": 2.476983970127841e-05, + "loss": 0.1482, + "step": 200 + }, + { + "epoch": 0.33372335554764204, + "eval_loss": 0.3089325428009033, + "eval_runtime": 34.8769, + "eval_samples_per_second": 5.419, + "eval_steps_per_second": 5.419, + "step": 205 + }, + { + "epoch": 0.34186294958538943, + "grad_norm": 0.28704971075057983, + "learning_rate": 2.472497457543525e-05, + "loss": 0.1471, + "step": 210 + }, + { + "epoch": 0.35814213766088415, + "grad_norm": 0.2939195930957794, + "learning_rate": 2.4676165272428866e-05, + "loss": 0.1631, + "step": 220 + }, + { + "epoch": 0.3744213257363789, + "grad_norm": 0.31506845355033875, + "learning_rate": 2.4623427539738897e-05, + "loss": 0.1353, + "step": 230 + }, + { + "epoch": 0.39070051381187365, + "grad_norm": 0.3761660158634186, + "learning_rate": 2.456677839228506e-05, + "loss": 0.1716, + "step": 240 + }, + { + "epoch": 0.4069797018873684, + "grad_norm": 0.29187777638435364, + "learning_rate": 2.450623610693757e-05, + "loss": 0.1195, + "step": 250 + }, + { + "epoch": 0.4232588899628631, + "grad_norm": 0.46237581968307495, + "learning_rate": 2.4441820216620425e-05, + "loss": 0.1484, + "step": 260 + }, + { + "epoch": 0.4395380780383578, + "grad_norm": 0.4580917954444885, + "learning_rate": 2.437355150400945e-05, + "loss": 0.1009, + "step": 270 + }, + { + "epoch": 0.4558172661138526, + "grad_norm": 0.4181467890739441, + "learning_rate": 2.4301451994827112e-05, + "loss": 0.1376, + "step": 280 + }, + { + "epoch": 0.4720964541893473, + "grad_norm": 0.3629908561706543, + "learning_rate": 2.422554495073633e-05, + "loss": 0.1083, + "step": 290 + }, + { + "epoch": 0.48837564226484204, + "grad_norm": 0.4282682240009308, + "learning_rate": 2.4145854861835447e-05, + "loss": 0.1373, + "step": 300 + }, + { + "epoch": 0.5046548303403368, + "grad_norm": 0.4914080500602722, + "learning_rate": 2.406240743875699e-05, + "loss": 0.156, + "step": 310 + }, + { + "epoch": 0.5209340184158315, + "grad_norm": 0.3880573511123657, + "learning_rate": 2.3975229604372526e-05, + "loss": 0.1415, + "step": 320 + }, + { + "epoch": 0.5372132064913262, + "grad_norm": 0.42599862813949585, + "learning_rate": 2.3884349485106477e-05, + "loss": 0.1338, + "step": 330 + }, + { + "epoch": 0.553492394566821, + "grad_norm": 0.4339046776294708, + "learning_rate": 2.378979640186163e-05, + "loss": 0.1368, + "step": 340 + }, + { + "epoch": 0.5697715826423158, + "grad_norm": 0.30713170766830444, + "learning_rate": 2.3691600860559222e-05, + "loss": 0.1154, + "step": 350 + }, + { + "epoch": 0.5860507707178104, + "grad_norm": 0.4618566036224365, + "learning_rate": 2.3589794542296764e-05, + "loss": 0.1203, + "step": 360 + }, + { + "epoch": 0.6023299587933052, + "grad_norm": 0.40802672505378723, + "learning_rate": 2.3484410293126664e-05, + "loss": 0.1144, + "step": 370 + }, + { + "epoch": 0.6186091468687999, + "grad_norm": 0.5242702960968018, + "learning_rate": 2.3375482113459014e-05, + "loss": 0.1281, + "step": 380 + }, + { + "epoch": 0.6348883349442946, + "grad_norm": 0.4045926630496979, + "learning_rate": 2.3263045147091944e-05, + "loss": 0.1145, + "step": 390 + }, + { + "epoch": 0.6511675230197894, + "grad_norm": 0.5347346067428589, + "learning_rate": 2.3147135669873096e-05, + "loss": 0.1256, + "step": 400 + }, + { + "epoch": 0.6674467110952841, + "grad_norm": 0.4755608141422272, + "learning_rate": 2.302779107799583e-05, + "loss": 0.1251, + "step": 410 + }, + { + "epoch": 0.6674467110952841, + "eval_loss": 0.26128318905830383, + "eval_runtime": 34.9177, + "eval_samples_per_second": 5.413, + "eval_steps_per_second": 5.413, + "step": 410 + }, + { + "epoch": 0.6837258991707789, + "grad_norm": 0.4720211923122406, + "learning_rate": 2.290504987593399e-05, + "loss": 0.1399, + "step": 420 + }, + { + "epoch": 0.7000050872462736, + "grad_norm": 0.709035336971283, + "learning_rate": 2.2778951664019105e-05, + "loss": 0.1375, + "step": 430 + }, + { + "epoch": 0.7162842753217683, + "grad_norm": 0.534866213798523, + "learning_rate": 2.2649537125664034e-05, + "loss": 0.1125, + "step": 440 + }, + { + "epoch": 0.7325634633972631, + "grad_norm": 0.522056519985199, + "learning_rate": 2.2516848014237146e-05, + "loss": 0.0943, + "step": 450 + }, + { + "epoch": 0.7488426514727577, + "grad_norm": 0.2830965518951416, + "learning_rate": 2.238092713959133e-05, + "loss": 0.1248, + "step": 460 + }, + { + "epoch": 0.7651218395482525, + "grad_norm": 0.39431601762771606, + "learning_rate": 2.2241818354252113e-05, + "loss": 0.1248, + "step": 470 + }, + { + "epoch": 0.7814010276237473, + "grad_norm": 0.4821482002735138, + "learning_rate": 2.209956653926944e-05, + "loss": 0.1359, + "step": 480 + }, + { + "epoch": 0.797680215699242, + "grad_norm": 0.4956236481666565, + "learning_rate": 2.1954217589737535e-05, + "loss": 0.1232, + "step": 490 + }, + { + "epoch": 0.8139594037747367, + "grad_norm": 0.49444642663002014, + "learning_rate": 2.180581839998766e-05, + "loss": 0.1031, + "step": 500 + }, + { + "epoch": 0.8302385918502315, + "grad_norm": 0.3857091963291168, + "learning_rate": 2.165441684845847e-05, + "loss": 0.1023, + "step": 510 + }, + { + "epoch": 0.8465177799257262, + "grad_norm": 0.4830643832683563, + "learning_rate": 2.150006178224886e-05, + "loss": 0.1067, + "step": 520 + }, + { + "epoch": 0.862796968001221, + "grad_norm": 0.5119408965110779, + "learning_rate": 2.1342803001358278e-05, + "loss": 0.1209, + "step": 530 + }, + { + "epoch": 0.8790761560767156, + "grad_norm": 0.46363013982772827, + "learning_rate": 2.118269124261963e-05, + "loss": 0.1134, + "step": 540 + }, + { + "epoch": 0.8953553441522104, + "grad_norm": 0.42933255434036255, + "learning_rate": 2.1019778163329912e-05, + "loss": 0.1101, + "step": 550 + }, + { + "epoch": 0.9116345322277052, + "grad_norm": 0.5474070906639099, + "learning_rate": 2.0854116324583867e-05, + "loss": 0.1291, + "step": 560 + }, + { + "epoch": 0.9279137203031999, + "grad_norm": 0.43502509593963623, + "learning_rate": 2.0685759174316066e-05, + "loss": 0.0936, + "step": 570 + }, + { + "epoch": 0.9441929083786946, + "grad_norm": 0.632621169090271, + "learning_rate": 2.051476103005684e-05, + "loss": 0.1196, + "step": 580 + }, + { + "epoch": 0.9604720964541893, + "grad_norm": 0.553187906742096, + "learning_rate": 2.034117706140768e-05, + "loss": 0.1186, + "step": 590 + }, + { + "epoch": 0.9767512845296841, + "grad_norm": 0.48446330428123474, + "learning_rate": 2.0165063272241712e-05, + "loss": 0.1249, + "step": 600 + }, + { + "epoch": 0.9930304726051788, + "grad_norm": 0.47837090492248535, + "learning_rate": 1.9986476482635003e-05, + "loss": 0.1097, + "step": 610 + }, + { + "epoch": 1.0011700666429262, + "eval_loss": 0.2388339340686798, + "eval_runtime": 34.8769, + "eval_samples_per_second": 5.419, + "eval_steps_per_second": 5.419, + "step": 615 + }, + { + "epoch": 1.0093096606806735, + "grad_norm": 0.5520356893539429, + "learning_rate": 1.980547431053456e-05, + "loss": 0.131, + "step": 620 + }, + { + "epoch": 1.0255888487561682, + "grad_norm": 0.6150078177452087, + "learning_rate": 1.9622115153168884e-05, + "loss": 0.1187, + "step": 630 + }, + { + "epoch": 1.041868036831663, + "grad_norm": 0.5100656151771545, + "learning_rate": 1.9436458168207117e-05, + "loss": 0.114, + "step": 640 + }, + { + "epoch": 1.0581472249071577, + "grad_norm": 0.5156052112579346, + "learning_rate": 1.9248563254672825e-05, + "loss": 0.1099, + "step": 650 + }, + { + "epoch": 1.0744264129826524, + "grad_norm": 0.4662775993347168, + "learning_rate": 1.9058491033618632e-05, + "loss": 0.1135, + "step": 660 + }, + { + "epoch": 1.0907056010581473, + "grad_norm": 0.4357255697250366, + "learning_rate": 1.886630282856787e-05, + "loss": 0.1036, + "step": 670 + }, + { + "epoch": 1.106984789133642, + "grad_norm": 0.3861764967441559, + "learning_rate": 1.867206064572962e-05, + "loss": 0.1145, + "step": 680 + }, + { + "epoch": 1.1232639772091366, + "grad_norm": 0.4562045633792877, + "learning_rate": 1.8475827153993447e-05, + "loss": 0.1107, + "step": 690 + }, + { + "epoch": 1.1395431652846315, + "grad_norm": 0.332917720079422, + "learning_rate": 1.8277665664710387e-05, + "loss": 0.1266, + "step": 700 + }, + { + "epoch": 1.1558223533601262, + "grad_norm": 0.5971720814704895, + "learning_rate": 1.807764011126663e-05, + "loss": 0.1122, + "step": 710 + }, + { + "epoch": 1.1721015414356208, + "grad_norm": 0.6102172136306763, + "learning_rate": 1.787581502845651e-05, + "loss": 0.1046, + "step": 720 + }, + { + "epoch": 1.1883807295111157, + "grad_norm": 0.5294010043144226, + "learning_rate": 1.767225553166146e-05, + "loss": 0.1044, + "step": 730 + }, + { + "epoch": 1.2046599175866104, + "grad_norm": 0.5074148178100586, + "learning_rate": 1.7467027295841688e-05, + "loss": 0.1251, + "step": 740 + }, + { + "epoch": 1.220939105662105, + "grad_norm": 0.6349917650222778, + "learning_rate": 1.7260196534347235e-05, + "loss": 0.1037, + "step": 750 + }, + { + "epoch": 1.2372182937376, + "grad_norm": 0.34580153226852417, + "learning_rate": 1.7051829977555426e-05, + "loss": 0.0831, + "step": 760 + }, + { + "epoch": 1.2534974818130946, + "grad_norm": 0.4629954993724823, + "learning_rate": 1.684199485134144e-05, + "loss": 0.1068, + "step": 770 + }, + { + "epoch": 1.2697766698885893, + "grad_norm": 0.6406750082969666, + "learning_rate": 1.6630758855389055e-05, + "loss": 0.1192, + "step": 780 + }, + { + "epoch": 1.286055857964084, + "grad_norm": 0.4982251226902008, + "learning_rate": 1.6418190141348485e-05, + "loss": 0.123, + "step": 790 + }, + { + "epoch": 1.3023350460395788, + "grad_norm": 0.5146717429161072, + "learning_rate": 1.6204357290848464e-05, + "loss": 0.0831, + "step": 800 + }, + { + "epoch": 1.3186142341150735, + "grad_norm": 0.4735712707042694, + "learning_rate": 1.5989329293369538e-05, + "loss": 0.0971, + "step": 810 + }, + { + "epoch": 1.3348934221905682, + "grad_norm": 0.7393200397491455, + "learning_rate": 1.5773175523985818e-05, + "loss": 0.0923, + "step": 820 + }, + { + "epoch": 1.3348934221905682, + "eval_loss": 0.22815725207328796, + "eval_runtime": 34.8794, + "eval_samples_per_second": 5.419, + "eval_steps_per_second": 5.419, + "step": 820 + }, + { + "epoch": 1.351172610266063, + "grad_norm": 0.8956180214881897, + "learning_rate": 1.5555965720982284e-05, + "loss": 0.0817, + "step": 830 + }, + { + "epoch": 1.3674517983415577, + "grad_norm": 0.7423743009567261, + "learning_rate": 1.533776996335497e-05, + "loss": 0.1178, + "step": 840 + }, + { + "epoch": 1.3837309864170524, + "grad_norm": 0.7034802436828613, + "learning_rate": 1.5118658648201145e-05, + "loss": 0.1289, + "step": 850 + }, + { + "epoch": 1.400010174492547, + "grad_norm": 0.48646238446235657, + "learning_rate": 1.4898702468006922e-05, + "loss": 0.0839, + "step": 860 + }, + { + "epoch": 1.416289362568042, + "grad_norm": 0.28704097867012024, + "learning_rate": 1.4677972387839548e-05, + "loss": 0.0974, + "step": 870 + }, + { + "epoch": 1.4325685506435366, + "grad_norm": 0.674045205116272, + "learning_rate": 1.4456539622451748e-05, + "loss": 0.1006, + "step": 880 + }, + { + "epoch": 1.4488477387190315, + "grad_norm": 0.3513787090778351, + "learning_rate": 1.4234475613305509e-05, + "loss": 0.1104, + "step": 890 + }, + { + "epoch": 1.4651269267945262, + "grad_norm": 0.8029477596282959, + "learning_rate": 1.4011852005522727e-05, + "loss": 0.1131, + "step": 900 + }, + { + "epoch": 1.4814061148700208, + "grad_norm": 0.5420731902122498, + "learning_rate": 1.378874062477015e-05, + "loss": 0.0943, + "step": 910 + }, + { + "epoch": 1.4976853029455155, + "grad_norm": 0.7574429512023926, + "learning_rate": 1.3565213454086048e-05, + "loss": 0.1234, + "step": 920 + }, + { + "epoch": 1.5139644910210102, + "grad_norm": 0.5867305994033813, + "learning_rate": 1.3341342610656157e-05, + "loss": 0.1036, + "step": 930 + }, + { + "epoch": 1.530243679096505, + "grad_norm": 0.47744086384773254, + "learning_rate": 1.311720032254629e-05, + "loss": 0.1082, + "step": 940 + }, + { + "epoch": 1.546522867172, + "grad_norm": 0.6975990533828735, + "learning_rate": 1.289285890539919e-05, + "loss": 0.0967, + "step": 950 + }, + { + "epoch": 1.5628020552474946, + "grad_norm": 0.7781053781509399, + "learning_rate": 1.2668390739103172e-05, + "loss": 0.1219, + "step": 960 + }, + { + "epoch": 1.5790812433229893, + "grad_norm": 0.5423984527587891, + "learning_rate": 1.2443868244439958e-05, + "loss": 0.1085, + "step": 970 + }, + { + "epoch": 1.595360431398484, + "grad_norm": 0.5535146594047546, + "learning_rate": 1.2219363859719392e-05, + "loss": 0.0942, + "step": 980 + }, + { + "epoch": 1.6116396194739786, + "grad_norm": 0.30531561374664307, + "learning_rate": 1.1994950017408451e-05, + "loss": 0.0944, + "step": 990 + }, + { + "epoch": 1.6279188075494735, + "grad_norm": 0.7325620055198669, + "learning_rate": 1.1770699120762161e-05, + "loss": 0.1126, + "step": 1000 + }, + { + "epoch": 1.6441979956249682, + "grad_norm": 1.1568708419799805, + "learning_rate": 1.1546683520463961e-05, + "loss": 0.1073, + "step": 1010 + }, + { + "epoch": 1.660477183700463, + "grad_norm": 0.6926931142807007, + "learning_rate": 1.1322975491282961e-05, + "loss": 0.0825, + "step": 1020 + }, + { + "epoch": 1.6686167777382104, + "eval_loss": 0.22156645357608795, + "eval_runtime": 34.8778, + "eval_samples_per_second": 5.419, + "eval_steps_per_second": 5.419, + "step": 1025 + }, + { + "epoch": 1.6767563717759577, + "grad_norm": 0.41277509927749634, + "learning_rate": 1.1099647208755764e-05, + "loss": 0.0991, + "step": 1030 + }, + { + "epoch": 1.6930355598514524, + "grad_norm": 0.4389091730117798, + "learning_rate": 1.0876770725900265e-05, + "loss": 0.088, + "step": 1040 + }, + { + "epoch": 1.709314747926947, + "grad_norm": 0.48445749282836914, + "learning_rate": 1.0654417949968986e-05, + "loss": 0.1158, + "step": 1050 + }, + { + "epoch": 1.725593936002442, + "grad_norm": 0.6507833003997803, + "learning_rate": 1.0432660619249448e-05, + "loss": 0.1099, + "step": 1060 + }, + { + "epoch": 1.7418731240779366, + "grad_norm": 0.6933814883232117, + "learning_rate": 1.0211570279919044e-05, + "loss": 0.0757, + "step": 1070 + }, + { + "epoch": 1.7581523121534315, + "grad_norm": 0.7795721292495728, + "learning_rate": 9.991218262961901e-06, + "loss": 0.1017, + "step": 1080 + }, + { + "epoch": 1.7744315002289262, + "grad_norm": 0.594406008720398, + "learning_rate": 9.771675661155165e-06, + "loss": 0.1144, + "step": 1090 + }, + { + "epoch": 1.7907106883044208, + "grad_norm": 0.34790194034576416, + "learning_rate": 9.553013306132158e-06, + "loss": 0.0904, + "step": 1100 + }, + { + "epoch": 1.8069898763799155, + "grad_norm": 0.4349744915962219, + "learning_rate": 9.335301745529751e-06, + "loss": 0.1085, + "step": 1110 + }, + { + "epoch": 1.8232690644554101, + "grad_norm": 0.5773786306381226, + "learning_rate": 9.118611220227399e-06, + "loss": 0.1038, + "step": 1120 + }, + { + "epoch": 1.839548252530905, + "grad_norm": 0.4364662766456604, + "learning_rate": 8.903011641685128e-06, + "loss": 0.097, + "step": 1130 + }, + { + "epoch": 1.8558274406063997, + "grad_norm": 0.7753048539161682, + "learning_rate": 8.688572569387817e-06, + "loss": 0.1045, + "step": 1140 + }, + { + "epoch": 1.8721066286818946, + "grad_norm": 0.48441290855407715, + "learning_rate": 8.475363188403022e-06, + "loss": 0.095, + "step": 1150 + }, + { + "epoch": 1.8883858167573893, + "grad_norm": 0.6351140141487122, + "learning_rate": 8.263452287059607e-06, + "loss": 0.0977, + "step": 1160 + }, + { + "epoch": 1.904665004832884, + "grad_norm": 0.8837946057319641, + "learning_rate": 8.052908234754376e-06, + "loss": 0.0987, + "step": 1170 + }, + { + "epoch": 1.9209441929083786, + "grad_norm": 0.48196184635162354, + "learning_rate": 7.84379895989388e-06, + "loss": 0.088, + "step": 1180 + }, + { + "epoch": 1.9372233809838735, + "grad_norm": 0.5001464486122131, + "learning_rate": 7.636191927978465e-06, + "loss": 0.1161, + "step": 1190 + }, + { + "epoch": 1.9535025690593681, + "grad_norm": 0.6405985951423645, + "learning_rate": 7.430154119835716e-06, + "loss": 0.1023, + "step": 1200 + }, + { + "epoch": 1.969781757134863, + "grad_norm": 0.7047804594039917, + "learning_rate": 7.225752010010231e-06, + "loss": 0.1131, + "step": 1210 + }, + { + "epoch": 1.9860609452103577, + "grad_norm": 0.5221819281578064, + "learning_rate": 7.023051545316763e-06, + "loss": 0.0948, + "step": 1220 + }, + { + "epoch": 2.0023401332858524, + "grad_norm": 0.4171787202358246, + "learning_rate": 6.822118123563614e-06, + "loss": 0.0995, + "step": 1230 + }, + { + "epoch": 2.0023401332858524, + "eval_loss": 0.21631866693496704, + "eval_runtime": 34.8988, + "eval_samples_per_second": 5.416, + "eval_steps_per_second": 5.416, + "step": 1230 + }, + { + "epoch": 2.018619321361347, + "grad_norm": 0.7596387267112732, + "learning_rate": 6.623016572453172e-06, + "loss": 0.104, + "step": 1240 + }, + { + "epoch": 2.0348985094368417, + "grad_norm": 0.3702397346496582, + "learning_rate": 6.425811128666353e-06, + "loss": 0.0693, + "step": 1250 + }, + { + "epoch": 2.0511776975123364, + "grad_norm": 0.605099081993103, + "learning_rate": 6.230565417137758e-06, + "loss": 0.097, + "step": 1260 + }, + { + "epoch": 2.0674568855878315, + "grad_norm": 0.4555053412914276, + "learning_rate": 6.03734243052818e-06, + "loss": 0.0976, + "step": 1270 + }, + { + "epoch": 2.083736073663326, + "grad_norm": 0.7848448157310486, + "learning_rate": 5.8462045089011066e-06, + "loss": 0.1013, + "step": 1280 + }, + { + "epoch": 2.100015261738821, + "grad_norm": 0.6905212998390198, + "learning_rate": 5.657213319609776e-06, + "loss": 0.1094, + "step": 1290 + }, + { + "epoch": 2.1162944498143155, + "grad_norm": 0.5153264999389648, + "learning_rate": 5.4704298374012834e-06, + "loss": 0.0789, + "step": 1300 + }, + { + "epoch": 2.13257363788981, + "grad_norm": 0.8393344879150391, + "learning_rate": 5.2859143247441e-06, + "loss": 0.0904, + "step": 1310 + }, + { + "epoch": 2.148852825965305, + "grad_norm": 0.7440715432167053, + "learning_rate": 5.103726312385452e-06, + "loss": 0.0938, + "step": 1320 + }, + { + "epoch": 2.1651320140408, + "grad_norm": 0.8069117069244385, + "learning_rate": 4.923924580144743e-06, + "loss": 0.0908, + "step": 1330 + }, + { + "epoch": 2.1814112021162946, + "grad_norm": 0.5500065088272095, + "learning_rate": 4.746567137949261e-06, + "loss": 0.0976, + "step": 1340 + }, + { + "epoch": 2.1976903901917892, + "grad_norm": 0.51816725730896, + "learning_rate": 4.5717112071182715e-06, + "loss": 0.0889, + "step": 1350 + }, + { + "epoch": 2.213969578267284, + "grad_norm": 0.4226435124874115, + "learning_rate": 4.399413201901559e-06, + "loss": 0.0814, + "step": 1360 + }, + { + "epoch": 2.2302487663427786, + "grad_norm": 0.4923081398010254, + "learning_rate": 4.229728711278325e-06, + "loss": 0.086, + "step": 1370 + }, + { + "epoch": 2.2465279544182732, + "grad_norm": 0.5883035659790039, + "learning_rate": 4.062712481022371e-06, + "loss": 0.095, + "step": 1380 + }, + { + "epoch": 2.2628071424937684, + "grad_norm": 0.5114026069641113, + "learning_rate": 3.898418396039323e-06, + "loss": 0.1038, + "step": 1390 + }, + { + "epoch": 2.279086330569263, + "grad_norm": 0.5486142039299011, + "learning_rate": 3.7368994629815953e-06, + "loss": 0.0902, + "step": 1400 + }, + { + "epoch": 2.2953655186447577, + "grad_norm": 0.756912350654602, + "learning_rate": 3.5782077931467e-06, + "loss": 0.0706, + "step": 1410 + }, + { + "epoch": 2.3116447067202524, + "grad_norm": 0.6888672709465027, + "learning_rate": 3.42239458566444e-06, + "loss": 0.1065, + "step": 1420 + }, + { + "epoch": 2.327923894795747, + "grad_norm": 0.5472647547721863, + "learning_rate": 3.269510110978398e-06, + "loss": 0.0815, + "step": 1430 + }, + { + "epoch": 2.3360634888334944, + "eval_loss": 0.21516536176204681, + "eval_runtime": 34.891, + "eval_samples_per_second": 5.417, + "eval_steps_per_second": 5.417, + "step": 1435 + }, + { + "epoch": 2.3442030828712417, + "grad_norm": 0.5613276958465576, + "learning_rate": 3.119603694627042e-06, + "loss": 0.0923, + "step": 1440 + }, + { + "epoch": 2.3604822709467363, + "grad_norm": 0.8540468811988831, + "learning_rate": 2.9727237013296854e-06, + "loss": 0.1192, + "step": 1450 + }, + { + "epoch": 2.3767614590222315, + "grad_norm": 0.7269755005836487, + "learning_rate": 2.828917519382457e-06, + "loss": 0.0889, + "step": 1460 + }, + { + "epoch": 2.393040647097726, + "grad_norm": 0.6140917539596558, + "learning_rate": 2.6882315453692686e-06, + "loss": 0.0936, + "step": 1470 + }, + { + "epoch": 2.409319835173221, + "grad_norm": 0.4730454981327057, + "learning_rate": 2.550711169192775e-06, + "loss": 0.0976, + "step": 1480 + }, + { + "epoch": 2.4255990232487155, + "grad_norm": 0.5974939465522766, + "learning_rate": 2.4164007594300875e-06, + "loss": 0.0913, + "step": 1490 + }, + { + "epoch": 2.44187821132421, + "grad_norm": 0.6668256521224976, + "learning_rate": 2.2853436490180374e-06, + "loss": 0.0982, + "step": 1500 + }, + { + "epoch": 2.458157399399705, + "grad_norm": 0.6182997226715088, + "learning_rate": 2.1575821212725334e-06, + "loss": 0.0861, + "step": 1510 + }, + { + "epoch": 2.4744365874752, + "grad_norm": 0.5460255146026611, + "learning_rate": 2.0331573962465864e-06, + "loss": 0.086, + "step": 1520 + }, + { + "epoch": 2.4907157755506946, + "grad_norm": 0.6361858248710632, + "learning_rate": 1.912109617431372e-06, + "loss": 0.0911, + "step": 1530 + }, + { + "epoch": 2.5069949636261892, + "grad_norm": 0.8699812889099121, + "learning_rate": 1.7944778388046243e-06, + "loss": 0.0884, + "step": 1540 + }, + { + "epoch": 2.523274151701684, + "grad_norm": 0.5886068344116211, + "learning_rate": 1.680300012230543e-06, + "loss": 0.1027, + "step": 1550 + }, + { + "epoch": 2.5395533397771786, + "grad_norm": 0.6138848066329956, + "learning_rate": 1.5696129752152774e-06, + "loss": 0.0939, + "step": 1560 + }, + { + "epoch": 2.5558325278526732, + "grad_norm": 0.7268607020378113, + "learning_rate": 1.4624524390219455e-06, + "loss": 0.083, + "step": 1570 + }, + { + "epoch": 2.572111715928168, + "grad_norm": 0.619888961315155, + "learning_rate": 1.3588529771490054e-06, + "loss": 0.1087, + "step": 1580 + }, + { + "epoch": 2.5883909040036626, + "grad_norm": 0.5299406051635742, + "learning_rate": 1.2588480141757204e-06, + "loss": 0.0997, + "step": 1590 + }, + { + "epoch": 2.6046700920791577, + "grad_norm": 0.6051465272903442, + "learning_rate": 1.1624698149782842e-06, + "loss": 0.0953, + "step": 1600 + }, + { + "epoch": 2.6209492801546523, + "grad_norm": 0.6585546135902405, + "learning_rate": 1.0697494743201226e-06, + "loss": 0.1057, + "step": 1610 + }, + { + "epoch": 2.637228468230147, + "grad_norm": 0.5243381261825562, + "learning_rate": 9.807169068197008e-07, + "loss": 0.09, + "step": 1620 + }, + { + "epoch": 2.6535076563056417, + "grad_norm": 0.6636092066764832, + "learning_rate": 8.95400837299093e-07, + "loss": 0.061, + "step": 1630 + }, + { + "epoch": 2.6697868443811363, + "grad_norm": 0.6529124975204468, + "learning_rate": 8.138287915164078e-07, + "loss": 0.0897, + "step": 1640 + }, + { + "epoch": 2.6697868443811363, + "eval_loss": 0.21331782639026642, + "eval_runtime": 34.9348, + "eval_samples_per_second": 5.41, + "eval_steps_per_second": 5.41, + "step": 1640 + } + ], + "logging_steps": 10, + "max_steps": 1842, + "num_input_tokens_seen": 0, + "num_train_epochs": 3, + "save_steps": 205, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 1.4019856483427942e+18, + "train_batch_size": 1, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-1640/training_args.bin b/checkpoint-1640/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..2245771e9af5d2e40208df60ece818e12dad2609 --- /dev/null +++ b/checkpoint-1640/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fba26e4087d6f0d4a22136ed77169fa2a327b3a4a3accd6168e4c2098fb9b7b6 +size 5624 diff --git a/checkpoint-1842/README.md b/checkpoint-1842/README.md new file mode 100644 index 0000000000000000000000000000000000000000..20c165c7feaa880a18e0662941a5b246a698be2e --- /dev/null +++ b/checkpoint-1842/README.md @@ -0,0 +1,209 @@ +--- +base_model: Salesforce/Llama-xLAM-2-8b-fc-r +library_name: peft +pipeline_tag: text-generation +tags: +- base_model:adapter:Salesforce/Llama-xLAM-2-8b-fc-r +- lora +- sft +- transformers +- trl +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.17.1 \ No newline at end of file diff --git a/checkpoint-1842/adapter_config.json b/checkpoint-1842/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..9eda25bff7c128105e91aa1a00507aaa31dd53fd --- /dev/null +++ b/checkpoint-1842/adapter_config.json @@ -0,0 +1,39 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "Salesforce/Llama-xLAM-2-8b-fc-r", + "bias": "none", + "corda_config": null, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 32, + "lora_bias": false, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "qalora_group_size": 16, + "r": 16, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "q_proj", + "v_proj", + "k_proj", + "o_proj" + ], + "target_parameters": null, + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/checkpoint-1842/adapter_model.safetensors b/checkpoint-1842/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..83cbd30c9ee1f421cabf57047de9985e0f00f268 --- /dev/null +++ b/checkpoint-1842/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:67b0679533d285735d22e9dbf5f075d7149feee837f8bf4ca7b928dcb02622a6 +size 54560368 diff --git a/checkpoint-1842/optimizer.pt b/checkpoint-1842/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..cf7bb834cb6c27a27f1e0f035f9a7c78d8da2eb6 --- /dev/null +++ b/checkpoint-1842/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cbe1e4c55ca39bb4e6a348629b1026e92b52d387770012728a6f3bd00ab8dc8e +size 109267450 diff --git a/checkpoint-1842/rng_state.pth b/checkpoint-1842/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..58cf8048c63b3571e40f1717389bbf0bda5eabf7 --- /dev/null +++ b/checkpoint-1842/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0da6703d0afae56330e498e4911bb572ed893abe5de4aa0b1476dbdbf19c43f2 +size 14244 diff --git a/checkpoint-1842/scheduler.pt b/checkpoint-1842/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..33bddf6324633f009254fbbce688857981ec6461 --- /dev/null +++ b/checkpoint-1842/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:16613f52b29f01fc3ecf484f43545b47fd6a8dc2032cf7cda19c010b87f5a885 +size 1064 diff --git a/checkpoint-1842/special_tokens_map.json b/checkpoint-1842/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..1c69f55fd7bb10288d16d4c76497e21e25adfe4f --- /dev/null +++ b/checkpoint-1842/special_tokens_map.json @@ -0,0 +1,27 @@ +{ + "additional_special_tokens": [ + "<|eot_id|>", + "<|eom_id|>" + ], + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/checkpoint-1842/tokenizer.json b/checkpoint-1842/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..1c1d8d5c9024994f1d3b00f9662b8dd89ca13cf2 --- /dev/null +++ b/checkpoint-1842/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b9e4e7fb171f92fd137b777cc2714bf87d11576700a1dcd7a399e7bbe39537b +size 17209920 diff --git a/checkpoint-1842/tokenizer_config.json b/checkpoint-1842/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..3c3fc3b2c234aa8b39e8ee01e3ee9e6fe54a2a1f --- /dev/null +++ b/checkpoint-1842/tokenizer_config.json @@ -0,0 +1,2070 @@ +{ + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|python_tag|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "additional_special_tokens": [ + "<|eot_id|>", + "<|eom_id|>" + ], + "bos_token": "<|begin_of_text|>", + "chat_template": "{{- bos_token }}\n{%- if custom_tools is defined %}\n {%- set tools = custom_tools %}\n{%- endif %}\n{%- if not tools_in_user_message is defined %}\n {%- set tools_in_user_message = true %}\n{%- endif %}\n{%- if not tools is defined %}\n {%- set tools = none %}\n{%- endif %}\n\n{#- Extract system message #}\n{{- \"<|start_header_id|>system<|end_header_id|>\\n\\n\" }}\n{%- if messages[0]['role'] == 'system' %}\n {%- set system_message = messages[0]['content'] | trim %}\n {%- set messages = messages[1:] %}\n {{- system_message + \"\\n\" }}\n{%- else %}\n {%- set system_message = \"You are a helpful assistant that can use tools. You are developed by Salesforce xLAM team.\" %}\n {% set format_instruction %}You have access to a set of tools. When using tools, make calls in a single JSON array: \n\n[{\"name\": \"tool_call_name\", \"arguments\": {\"arg1\": \"value1\", \"arg2\": \"value2\"}}, ... (additional parallel tool calls as needed)]\n\nIf no tool is suitable, state that explicitly. If the user's input lacks required parameters, ask for clarification. Do not interpret or respond until tool results are returned. Once they are available, process them or make additional calls if needed. For tasks that don't require tools, such as casual conversation or general advice, respond directly in plain text. The available tools are:{% endset %}\n {{- system_message + \"\\n\" }}\n {%- if tools is not none %}\n {{- format_instruction + \"\\n\\n\" }}\n {%- endif %}\n{%- endif %}\n\n\n{%- if tools is not none %}\n {%- for t in tools %}\n {{- t | tojson(indent=4) }}\n {{- \"\\n\\n\" }}\n {%- endfor %}\n{%- endif %}\n{{- \"<|eot_id|>\" }}\n\n{%- for message in messages %}\n {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %}\n {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\\n\\n'+ message['content'] | trim + '<|eot_id|>' }}\n {%- elif 'tool_calls' in message %}\n {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' -}}\n {%- if message['tool_calls'] %}\n {{- \"[\" }}\n {%- for tool_call_function in message.tool_calls %}\n {%- set tool_call = tool_call_function.function %}\n {{- '{\"name\": \"' + tool_call.name + '\", ' }}\n {{- '\"arguments\": ' }}\n {{- tool_call.arguments | tojson }}\n {{- \"}\" }}\n {%- if not loop.last %}\n {{- \", \" }}\n {%- endif %}\n {%- endfor %}\n {{- \"]\" }}\n {{- \"<|eot_id|>\" }}\n {%- elif message['content'] %}\n {{- message['content'] | trim + '<|eot_id|>' }}\n {%- else %}\n {{- \"[]\\n\" + '<|eot_id|>' }}\n {%- endif %}\n {%- elif message.role == \"tool\" or message.role == \"ipython\" %}\n {{- \"<|start_header_id|>\" + \"ipython\" + \"<|end_header_id|>\\n\\n\" }}\n {%- set content = message[\"content\"] %}\n {%- if content is mapping or (content is iterable and content is not string) %}\n {{- content | tojson }}\n {%- else %}\n {{- content }}\n {%- endif %}\n {{- \"<|eot_id|>\" }}\n {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' }}\n{%- endif %}", + "clean_up_tokenization_spaces": true, + "eos_token": "<|eot_id|>", + "extra_special_tokens": {}, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 16384, + "pad_token": "<|eot_id|>", + "padding_side": "right", + "split_special_tokens": false, + "tokenizer_class": "PreTrainedTokenizerFast" +} diff --git a/checkpoint-1842/trainer_state.json b/checkpoint-1842/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..7e26df15734e91444daba55706b1d7ecd59c3621 --- /dev/null +++ b/checkpoint-1842/trainer_state.json @@ -0,0 +1,1385 @@ +{ + "best_metric": 0.21331782639026642, + "best_model_checkpoint": "./xlam_lora_new_ete_over_size_3epoch_multi_mix/checkpoint-1640", + "epoch": 2.9986264435061303, + "eval_steps": 205, + "global_step": 1842, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.016279188075494735, + "grad_norm": 0.7572630643844604, + "learning_rate": 2.688172043010753e-06, + "loss": 0.5223, + "step": 10 + }, + { + "epoch": 0.03255837615098947, + "grad_norm": 0.417061984539032, + "learning_rate": 5.376344086021506e-06, + "loss": 0.4858, + "step": 20 + }, + { + "epoch": 0.048837564226484206, + "grad_norm": 0.3718095123767853, + "learning_rate": 8.064516129032258e-06, + "loss": 0.4246, + "step": 30 + }, + { + "epoch": 0.06511675230197894, + "grad_norm": 0.2949349582195282, + "learning_rate": 1.0752688172043012e-05, + "loss": 0.4405, + "step": 40 + }, + { + "epoch": 0.08139594037747368, + "grad_norm": 0.3159159719944, + "learning_rate": 1.3440860215053763e-05, + "loss": 0.4148, + "step": 50 + }, + { + "epoch": 0.09767512845296841, + "grad_norm": 0.4167034327983856, + "learning_rate": 1.6129032258064517e-05, + "loss": 0.3393, + "step": 60 + }, + { + "epoch": 0.11395431652846315, + "grad_norm": 0.39410400390625, + "learning_rate": 1.881720430107527e-05, + "loss": 0.2464, + "step": 70 + }, + { + "epoch": 0.13023350460395788, + "grad_norm": 0.3644021153450012, + "learning_rate": 2.1505376344086024e-05, + "loss": 0.2294, + "step": 80 + }, + { + "epoch": 0.1465126926794526, + "grad_norm": 0.30372634530067444, + "learning_rate": 2.4193548387096777e-05, + "loss": 0.2315, + "step": 90 + }, + { + "epoch": 0.16279188075494735, + "grad_norm": 0.2586315870285034, + "learning_rate": 2.4999011923655086e-05, + "loss": 0.1932, + "step": 100 + }, + { + "epoch": 0.17907106883044208, + "grad_norm": 0.37825971841812134, + "learning_rate": 2.4994172742085852e-05, + "loss": 0.2204, + "step": 110 + }, + { + "epoch": 0.19535025690593683, + "grad_norm": 0.21422357857227325, + "learning_rate": 2.4985302531208654e-05, + "loss": 0.1795, + "step": 120 + }, + { + "epoch": 0.21162944498143155, + "grad_norm": 0.2566869854927063, + "learning_rate": 2.4972404152844008e-05, + "loss": 0.1668, + "step": 130 + }, + { + "epoch": 0.2279086330569263, + "grad_norm": 0.28194501996040344, + "learning_rate": 2.49554817684312e-05, + "loss": 0.1476, + "step": 140 + }, + { + "epoch": 0.24418782113242102, + "grad_norm": 0.24139340221881866, + "learning_rate": 2.4934540837685647e-05, + "loss": 0.1609, + "step": 150 + }, + { + "epoch": 0.26046700920791577, + "grad_norm": 0.3306334614753723, + "learning_rate": 2.490958811683741e-05, + "loss": 0.1638, + "step": 160 + }, + { + "epoch": 0.2767461972834105, + "grad_norm": 0.27301114797592163, + "learning_rate": 2.4880631656451447e-05, + "loss": 0.1494, + "step": 170 + }, + { + "epoch": 0.2930253853589052, + "grad_norm": 0.34037259221076965, + "learning_rate": 2.484768079883018e-05, + "loss": 0.1534, + "step": 180 + }, + { + "epoch": 0.30930457343439993, + "grad_norm": 0.2306762933731079, + "learning_rate": 2.4810746174999418e-05, + "loss": 0.1749, + "step": 190 + }, + { + "epoch": 0.3255837615098947, + "grad_norm": 0.3183388113975525, + "learning_rate": 2.476983970127841e-05, + "loss": 0.1482, + "step": 200 + }, + { + "epoch": 0.33372335554764204, + "eval_loss": 0.3089325428009033, + "eval_runtime": 34.8769, + "eval_samples_per_second": 5.419, + "eval_steps_per_second": 5.419, + "step": 205 + }, + { + "epoch": 0.34186294958538943, + "grad_norm": 0.28704971075057983, + "learning_rate": 2.472497457543525e-05, + "loss": 0.1471, + "step": 210 + }, + { + "epoch": 0.35814213766088415, + "grad_norm": 0.2939195930957794, + "learning_rate": 2.4676165272428866e-05, + "loss": 0.1631, + "step": 220 + }, + { + "epoch": 0.3744213257363789, + "grad_norm": 0.31506845355033875, + "learning_rate": 2.4623427539738897e-05, + "loss": 0.1353, + "step": 230 + }, + { + "epoch": 0.39070051381187365, + "grad_norm": 0.3761660158634186, + "learning_rate": 2.456677839228506e-05, + "loss": 0.1716, + "step": 240 + }, + { + "epoch": 0.4069797018873684, + "grad_norm": 0.29187777638435364, + "learning_rate": 2.450623610693757e-05, + "loss": 0.1195, + "step": 250 + }, + { + "epoch": 0.4232588899628631, + "grad_norm": 0.46237581968307495, + "learning_rate": 2.4441820216620425e-05, + "loss": 0.1484, + "step": 260 + }, + { + "epoch": 0.4395380780383578, + "grad_norm": 0.4580917954444885, + "learning_rate": 2.437355150400945e-05, + "loss": 0.1009, + "step": 270 + }, + { + "epoch": 0.4558172661138526, + "grad_norm": 0.4181467890739441, + "learning_rate": 2.4301451994827112e-05, + "loss": 0.1376, + "step": 280 + }, + { + "epoch": 0.4720964541893473, + "grad_norm": 0.3629908561706543, + "learning_rate": 2.422554495073633e-05, + "loss": 0.1083, + "step": 290 + }, + { + "epoch": 0.48837564226484204, + "grad_norm": 0.4282682240009308, + "learning_rate": 2.4145854861835447e-05, + "loss": 0.1373, + "step": 300 + }, + { + "epoch": 0.5046548303403368, + "grad_norm": 0.4914080500602722, + "learning_rate": 2.406240743875699e-05, + "loss": 0.156, + "step": 310 + }, + { + "epoch": 0.5209340184158315, + "grad_norm": 0.3880573511123657, + "learning_rate": 2.3975229604372526e-05, + "loss": 0.1415, + "step": 320 + }, + { + "epoch": 0.5372132064913262, + "grad_norm": 0.42599862813949585, + "learning_rate": 2.3884349485106477e-05, + "loss": 0.1338, + "step": 330 + }, + { + "epoch": 0.553492394566821, + "grad_norm": 0.4339046776294708, + "learning_rate": 2.378979640186163e-05, + "loss": 0.1368, + "step": 340 + }, + { + "epoch": 0.5697715826423158, + "grad_norm": 0.30713170766830444, + "learning_rate": 2.3691600860559222e-05, + "loss": 0.1154, + "step": 350 + }, + { + "epoch": 0.5860507707178104, + "grad_norm": 0.4618566036224365, + "learning_rate": 2.3589794542296764e-05, + "loss": 0.1203, + "step": 360 + }, + { + "epoch": 0.6023299587933052, + "grad_norm": 0.40802672505378723, + "learning_rate": 2.3484410293126664e-05, + "loss": 0.1144, + "step": 370 + }, + { + "epoch": 0.6186091468687999, + "grad_norm": 0.5242702960968018, + "learning_rate": 2.3375482113459014e-05, + "loss": 0.1281, + "step": 380 + }, + { + "epoch": 0.6348883349442946, + "grad_norm": 0.4045926630496979, + "learning_rate": 2.3263045147091944e-05, + "loss": 0.1145, + "step": 390 + }, + { + "epoch": 0.6511675230197894, + "grad_norm": 0.5347346067428589, + "learning_rate": 2.3147135669873096e-05, + "loss": 0.1256, + "step": 400 + }, + { + "epoch": 0.6674467110952841, + "grad_norm": 0.4755608141422272, + "learning_rate": 2.302779107799583e-05, + "loss": 0.1251, + "step": 410 + }, + { + "epoch": 0.6674467110952841, + "eval_loss": 0.26128318905830383, + "eval_runtime": 34.9177, + "eval_samples_per_second": 5.413, + "eval_steps_per_second": 5.413, + "step": 410 + }, + { + "epoch": 0.6837258991707789, + "grad_norm": 0.4720211923122406, + "learning_rate": 2.290504987593399e-05, + "loss": 0.1399, + "step": 420 + }, + { + "epoch": 0.7000050872462736, + "grad_norm": 0.709035336971283, + "learning_rate": 2.2778951664019105e-05, + "loss": 0.1375, + "step": 430 + }, + { + "epoch": 0.7162842753217683, + "grad_norm": 0.534866213798523, + "learning_rate": 2.2649537125664034e-05, + "loss": 0.1125, + "step": 440 + }, + { + "epoch": 0.7325634633972631, + "grad_norm": 0.522056519985199, + "learning_rate": 2.2516848014237146e-05, + "loss": 0.0943, + "step": 450 + }, + { + "epoch": 0.7488426514727577, + "grad_norm": 0.2830965518951416, + "learning_rate": 2.238092713959133e-05, + "loss": 0.1248, + "step": 460 + }, + { + "epoch": 0.7651218395482525, + "grad_norm": 0.39431601762771606, + "learning_rate": 2.2241818354252113e-05, + "loss": 0.1248, + "step": 470 + }, + { + "epoch": 0.7814010276237473, + "grad_norm": 0.4821482002735138, + "learning_rate": 2.209956653926944e-05, + "loss": 0.1359, + "step": 480 + }, + { + "epoch": 0.797680215699242, + "grad_norm": 0.4956236481666565, + "learning_rate": 2.1954217589737535e-05, + "loss": 0.1232, + "step": 490 + }, + { + "epoch": 0.8139594037747367, + "grad_norm": 0.49444642663002014, + "learning_rate": 2.180581839998766e-05, + "loss": 0.1031, + "step": 500 + }, + { + "epoch": 0.8302385918502315, + "grad_norm": 0.3857091963291168, + "learning_rate": 2.165441684845847e-05, + "loss": 0.1023, + "step": 510 + }, + { + "epoch": 0.8465177799257262, + "grad_norm": 0.4830643832683563, + "learning_rate": 2.150006178224886e-05, + "loss": 0.1067, + "step": 520 + }, + { + "epoch": 0.862796968001221, + "grad_norm": 0.5119408965110779, + "learning_rate": 2.1342803001358278e-05, + "loss": 0.1209, + "step": 530 + }, + { + "epoch": 0.8790761560767156, + "grad_norm": 0.46363013982772827, + "learning_rate": 2.118269124261963e-05, + "loss": 0.1134, + "step": 540 + }, + { + "epoch": 0.8953553441522104, + "grad_norm": 0.42933255434036255, + "learning_rate": 2.1019778163329912e-05, + "loss": 0.1101, + "step": 550 + }, + { + "epoch": 0.9116345322277052, + "grad_norm": 0.5474070906639099, + "learning_rate": 2.0854116324583867e-05, + "loss": 0.1291, + "step": 560 + }, + { + "epoch": 0.9279137203031999, + "grad_norm": 0.43502509593963623, + "learning_rate": 2.0685759174316066e-05, + "loss": 0.0936, + "step": 570 + }, + { + "epoch": 0.9441929083786946, + "grad_norm": 0.632621169090271, + "learning_rate": 2.051476103005684e-05, + "loss": 0.1196, + "step": 580 + }, + { + "epoch": 0.9604720964541893, + "grad_norm": 0.553187906742096, + "learning_rate": 2.034117706140768e-05, + "loss": 0.1186, + "step": 590 + }, + { + "epoch": 0.9767512845296841, + "grad_norm": 0.48446330428123474, + "learning_rate": 2.0165063272241712e-05, + "loss": 0.1249, + "step": 600 + }, + { + "epoch": 0.9930304726051788, + "grad_norm": 0.47837090492248535, + "learning_rate": 1.9986476482635003e-05, + "loss": 0.1097, + "step": 610 + }, + { + "epoch": 1.0011700666429262, + "eval_loss": 0.2388339340686798, + "eval_runtime": 34.8769, + "eval_samples_per_second": 5.419, + "eval_steps_per_second": 5.419, + "step": 615 + }, + { + "epoch": 1.0093096606806735, + "grad_norm": 0.5520356893539429, + "learning_rate": 1.980547431053456e-05, + "loss": 0.131, + "step": 620 + }, + { + "epoch": 1.0255888487561682, + "grad_norm": 0.6150078177452087, + "learning_rate": 1.9622115153168884e-05, + "loss": 0.1187, + "step": 630 + }, + { + "epoch": 1.041868036831663, + "grad_norm": 0.5100656151771545, + "learning_rate": 1.9436458168207117e-05, + "loss": 0.114, + "step": 640 + }, + { + "epoch": 1.0581472249071577, + "grad_norm": 0.5156052112579346, + "learning_rate": 1.9248563254672825e-05, + "loss": 0.1099, + "step": 650 + }, + { + "epoch": 1.0744264129826524, + "grad_norm": 0.4662775993347168, + "learning_rate": 1.9058491033618632e-05, + "loss": 0.1135, + "step": 660 + }, + { + "epoch": 1.0907056010581473, + "grad_norm": 0.4357255697250366, + "learning_rate": 1.886630282856787e-05, + "loss": 0.1036, + "step": 670 + }, + { + "epoch": 1.106984789133642, + "grad_norm": 0.3861764967441559, + "learning_rate": 1.867206064572962e-05, + "loss": 0.1145, + "step": 680 + }, + { + "epoch": 1.1232639772091366, + "grad_norm": 0.4562045633792877, + "learning_rate": 1.8475827153993447e-05, + "loss": 0.1107, + "step": 690 + }, + { + "epoch": 1.1395431652846315, + "grad_norm": 0.332917720079422, + "learning_rate": 1.8277665664710387e-05, + "loss": 0.1266, + "step": 700 + }, + { + "epoch": 1.1558223533601262, + "grad_norm": 0.5971720814704895, + "learning_rate": 1.807764011126663e-05, + "loss": 0.1122, + "step": 710 + }, + { + "epoch": 1.1721015414356208, + "grad_norm": 0.6102172136306763, + "learning_rate": 1.787581502845651e-05, + "loss": 0.1046, + "step": 720 + }, + { + "epoch": 1.1883807295111157, + "grad_norm": 0.5294010043144226, + "learning_rate": 1.767225553166146e-05, + "loss": 0.1044, + "step": 730 + }, + { + "epoch": 1.2046599175866104, + "grad_norm": 0.5074148178100586, + "learning_rate": 1.7467027295841688e-05, + "loss": 0.1251, + "step": 740 + }, + { + "epoch": 1.220939105662105, + "grad_norm": 0.6349917650222778, + "learning_rate": 1.7260196534347235e-05, + "loss": 0.1037, + "step": 750 + }, + { + "epoch": 1.2372182937376, + "grad_norm": 0.34580153226852417, + "learning_rate": 1.7051829977555426e-05, + "loss": 0.0831, + "step": 760 + }, + { + "epoch": 1.2534974818130946, + "grad_norm": 0.4629954993724823, + "learning_rate": 1.684199485134144e-05, + "loss": 0.1068, + "step": 770 + }, + { + "epoch": 1.2697766698885893, + "grad_norm": 0.6406750082969666, + "learning_rate": 1.6630758855389055e-05, + "loss": 0.1192, + "step": 780 + }, + { + "epoch": 1.286055857964084, + "grad_norm": 0.4982251226902008, + "learning_rate": 1.6418190141348485e-05, + "loss": 0.123, + "step": 790 + }, + { + "epoch": 1.3023350460395788, + "grad_norm": 0.5146717429161072, + "learning_rate": 1.6204357290848464e-05, + "loss": 0.0831, + "step": 800 + }, + { + "epoch": 1.3186142341150735, + "grad_norm": 0.4735712707042694, + "learning_rate": 1.5989329293369538e-05, + "loss": 0.0971, + "step": 810 + }, + { + "epoch": 1.3348934221905682, + "grad_norm": 0.7393200397491455, + "learning_rate": 1.5773175523985818e-05, + "loss": 0.0923, + "step": 820 + }, + { + "epoch": 1.3348934221905682, + "eval_loss": 0.22815725207328796, + "eval_runtime": 34.8794, + "eval_samples_per_second": 5.419, + "eval_steps_per_second": 5.419, + "step": 820 + }, + { + "epoch": 1.351172610266063, + "grad_norm": 0.8956180214881897, + "learning_rate": 1.5555965720982284e-05, + "loss": 0.0817, + "step": 830 + }, + { + "epoch": 1.3674517983415577, + "grad_norm": 0.7423743009567261, + "learning_rate": 1.533776996335497e-05, + "loss": 0.1178, + "step": 840 + }, + { + "epoch": 1.3837309864170524, + "grad_norm": 0.7034802436828613, + "learning_rate": 1.5118658648201145e-05, + "loss": 0.1289, + "step": 850 + }, + { + "epoch": 1.400010174492547, + "grad_norm": 0.48646238446235657, + "learning_rate": 1.4898702468006922e-05, + "loss": 0.0839, + "step": 860 + }, + { + "epoch": 1.416289362568042, + "grad_norm": 0.28704097867012024, + "learning_rate": 1.4677972387839548e-05, + "loss": 0.0974, + "step": 870 + }, + { + "epoch": 1.4325685506435366, + "grad_norm": 0.674045205116272, + "learning_rate": 1.4456539622451748e-05, + "loss": 0.1006, + "step": 880 + }, + { + "epoch": 1.4488477387190315, + "grad_norm": 0.3513787090778351, + "learning_rate": 1.4234475613305509e-05, + "loss": 0.1104, + "step": 890 + }, + { + "epoch": 1.4651269267945262, + "grad_norm": 0.8029477596282959, + "learning_rate": 1.4011852005522727e-05, + "loss": 0.1131, + "step": 900 + }, + { + "epoch": 1.4814061148700208, + "grad_norm": 0.5420731902122498, + "learning_rate": 1.378874062477015e-05, + "loss": 0.0943, + "step": 910 + }, + { + "epoch": 1.4976853029455155, + "grad_norm": 0.7574429512023926, + "learning_rate": 1.3565213454086048e-05, + "loss": 0.1234, + "step": 920 + }, + { + "epoch": 1.5139644910210102, + "grad_norm": 0.5867305994033813, + "learning_rate": 1.3341342610656157e-05, + "loss": 0.1036, + "step": 930 + }, + { + "epoch": 1.530243679096505, + "grad_norm": 0.47744086384773254, + "learning_rate": 1.311720032254629e-05, + "loss": 0.1082, + "step": 940 + }, + { + "epoch": 1.546522867172, + "grad_norm": 0.6975990533828735, + "learning_rate": 1.289285890539919e-05, + "loss": 0.0967, + "step": 950 + }, + { + "epoch": 1.5628020552474946, + "grad_norm": 0.7781053781509399, + "learning_rate": 1.2668390739103172e-05, + "loss": 0.1219, + "step": 960 + }, + { + "epoch": 1.5790812433229893, + "grad_norm": 0.5423984527587891, + "learning_rate": 1.2443868244439958e-05, + "loss": 0.1085, + "step": 970 + }, + { + "epoch": 1.595360431398484, + "grad_norm": 0.5535146594047546, + "learning_rate": 1.2219363859719392e-05, + "loss": 0.0942, + "step": 980 + }, + { + "epoch": 1.6116396194739786, + "grad_norm": 0.30531561374664307, + "learning_rate": 1.1994950017408451e-05, + "loss": 0.0944, + "step": 990 + }, + { + "epoch": 1.6279188075494735, + "grad_norm": 0.7325620055198669, + "learning_rate": 1.1770699120762161e-05, + "loss": 0.1126, + "step": 1000 + }, + { + "epoch": 1.6441979956249682, + "grad_norm": 1.1568708419799805, + "learning_rate": 1.1546683520463961e-05, + "loss": 0.1073, + "step": 1010 + }, + { + "epoch": 1.660477183700463, + "grad_norm": 0.6926931142807007, + "learning_rate": 1.1322975491282961e-05, + "loss": 0.0825, + "step": 1020 + }, + { + "epoch": 1.6686167777382104, + "eval_loss": 0.22156645357608795, + "eval_runtime": 34.8778, + "eval_samples_per_second": 5.419, + "eval_steps_per_second": 5.419, + "step": 1025 + }, + { + "epoch": 1.6767563717759577, + "grad_norm": 0.41277509927749634, + "learning_rate": 1.1099647208755764e-05, + "loss": 0.0991, + "step": 1030 + }, + { + "epoch": 1.6930355598514524, + "grad_norm": 0.4389091730117798, + "learning_rate": 1.0876770725900265e-05, + "loss": 0.088, + "step": 1040 + }, + { + "epoch": 1.709314747926947, + "grad_norm": 0.48445749282836914, + "learning_rate": 1.0654417949968986e-05, + "loss": 0.1158, + "step": 1050 + }, + { + "epoch": 1.725593936002442, + "grad_norm": 0.6507833003997803, + "learning_rate": 1.0432660619249448e-05, + "loss": 0.1099, + "step": 1060 + }, + { + "epoch": 1.7418731240779366, + "grad_norm": 0.6933814883232117, + "learning_rate": 1.0211570279919044e-05, + "loss": 0.0757, + "step": 1070 + }, + { + "epoch": 1.7581523121534315, + "grad_norm": 0.7795721292495728, + "learning_rate": 9.991218262961901e-06, + "loss": 0.1017, + "step": 1080 + }, + { + "epoch": 1.7744315002289262, + "grad_norm": 0.594406008720398, + "learning_rate": 9.771675661155165e-06, + "loss": 0.1144, + "step": 1090 + }, + { + "epoch": 1.7907106883044208, + "grad_norm": 0.34790194034576416, + "learning_rate": 9.553013306132158e-06, + "loss": 0.0904, + "step": 1100 + }, + { + "epoch": 1.8069898763799155, + "grad_norm": 0.4349744915962219, + "learning_rate": 9.335301745529751e-06, + "loss": 0.1085, + "step": 1110 + }, + { + "epoch": 1.8232690644554101, + "grad_norm": 0.5773786306381226, + "learning_rate": 9.118611220227399e-06, + "loss": 0.1038, + "step": 1120 + }, + { + "epoch": 1.839548252530905, + "grad_norm": 0.4364662766456604, + "learning_rate": 8.903011641685128e-06, + "loss": 0.097, + "step": 1130 + }, + { + "epoch": 1.8558274406063997, + "grad_norm": 0.7753048539161682, + "learning_rate": 8.688572569387817e-06, + "loss": 0.1045, + "step": 1140 + }, + { + "epoch": 1.8721066286818946, + "grad_norm": 0.48441290855407715, + "learning_rate": 8.475363188403022e-06, + "loss": 0.095, + "step": 1150 + }, + { + "epoch": 1.8883858167573893, + "grad_norm": 0.6351140141487122, + "learning_rate": 8.263452287059607e-06, + "loss": 0.0977, + "step": 1160 + }, + { + "epoch": 1.904665004832884, + "grad_norm": 0.8837946057319641, + "learning_rate": 8.052908234754376e-06, + "loss": 0.0987, + "step": 1170 + }, + { + "epoch": 1.9209441929083786, + "grad_norm": 0.48196184635162354, + "learning_rate": 7.84379895989388e-06, + "loss": 0.088, + "step": 1180 + }, + { + "epoch": 1.9372233809838735, + "grad_norm": 0.5001464486122131, + "learning_rate": 7.636191927978465e-06, + "loss": 0.1161, + "step": 1190 + }, + { + "epoch": 1.9535025690593681, + "grad_norm": 0.6405985951423645, + "learning_rate": 7.430154119835716e-06, + "loss": 0.1023, + "step": 1200 + }, + { + "epoch": 1.969781757134863, + "grad_norm": 0.7047804594039917, + "learning_rate": 7.225752010010231e-06, + "loss": 0.1131, + "step": 1210 + }, + { + "epoch": 1.9860609452103577, + "grad_norm": 0.5221819281578064, + "learning_rate": 7.023051545316763e-06, + "loss": 0.0948, + "step": 1220 + }, + { + "epoch": 2.0023401332858524, + "grad_norm": 0.4171787202358246, + "learning_rate": 6.822118123563614e-06, + "loss": 0.0995, + "step": 1230 + }, + { + "epoch": 2.0023401332858524, + "eval_loss": 0.21631866693496704, + "eval_runtime": 34.8988, + "eval_samples_per_second": 5.416, + "eval_steps_per_second": 5.416, + "step": 1230 + }, + { + "epoch": 2.018619321361347, + "grad_norm": 0.7596387267112732, + "learning_rate": 6.623016572453172e-06, + "loss": 0.104, + "step": 1240 + }, + { + "epoch": 2.0348985094368417, + "grad_norm": 0.3702397346496582, + "learning_rate": 6.425811128666353e-06, + "loss": 0.0693, + "step": 1250 + }, + { + "epoch": 2.0511776975123364, + "grad_norm": 0.605099081993103, + "learning_rate": 6.230565417137758e-06, + "loss": 0.097, + "step": 1260 + }, + { + "epoch": 2.0674568855878315, + "grad_norm": 0.4555053412914276, + "learning_rate": 6.03734243052818e-06, + "loss": 0.0976, + "step": 1270 + }, + { + "epoch": 2.083736073663326, + "grad_norm": 0.7848448157310486, + "learning_rate": 5.8462045089011066e-06, + "loss": 0.1013, + "step": 1280 + }, + { + "epoch": 2.100015261738821, + "grad_norm": 0.6905212998390198, + "learning_rate": 5.657213319609776e-06, + "loss": 0.1094, + "step": 1290 + }, + { + "epoch": 2.1162944498143155, + "grad_norm": 0.5153264999389648, + "learning_rate": 5.4704298374012834e-06, + "loss": 0.0789, + "step": 1300 + }, + { + "epoch": 2.13257363788981, + "grad_norm": 0.8393344879150391, + "learning_rate": 5.2859143247441e-06, + "loss": 0.0904, + "step": 1310 + }, + { + "epoch": 2.148852825965305, + "grad_norm": 0.7440715432167053, + "learning_rate": 5.103726312385452e-06, + "loss": 0.0938, + "step": 1320 + }, + { + "epoch": 2.1651320140408, + "grad_norm": 0.8069117069244385, + "learning_rate": 4.923924580144743e-06, + "loss": 0.0908, + "step": 1330 + }, + { + "epoch": 2.1814112021162946, + "grad_norm": 0.5500065088272095, + "learning_rate": 4.746567137949261e-06, + "loss": 0.0976, + "step": 1340 + }, + { + "epoch": 2.1976903901917892, + "grad_norm": 0.51816725730896, + "learning_rate": 4.5717112071182715e-06, + "loss": 0.0889, + "step": 1350 + }, + { + "epoch": 2.213969578267284, + "grad_norm": 0.4226435124874115, + "learning_rate": 4.399413201901559e-06, + "loss": 0.0814, + "step": 1360 + }, + { + "epoch": 2.2302487663427786, + "grad_norm": 0.4923081398010254, + "learning_rate": 4.229728711278325e-06, + "loss": 0.086, + "step": 1370 + }, + { + "epoch": 2.2465279544182732, + "grad_norm": 0.5883035659790039, + "learning_rate": 4.062712481022371e-06, + "loss": 0.095, + "step": 1380 + }, + { + "epoch": 2.2628071424937684, + "grad_norm": 0.5114026069641113, + "learning_rate": 3.898418396039323e-06, + "loss": 0.1038, + "step": 1390 + }, + { + "epoch": 2.279086330569263, + "grad_norm": 0.5486142039299011, + "learning_rate": 3.7368994629815953e-06, + "loss": 0.0902, + "step": 1400 + }, + { + "epoch": 2.2953655186447577, + "grad_norm": 0.756912350654602, + "learning_rate": 3.5782077931467e-06, + "loss": 0.0706, + "step": 1410 + }, + { + "epoch": 2.3116447067202524, + "grad_norm": 0.6888672709465027, + "learning_rate": 3.42239458566444e-06, + "loss": 0.1065, + "step": 1420 + }, + { + "epoch": 2.327923894795747, + "grad_norm": 0.5472647547721863, + "learning_rate": 3.269510110978398e-06, + "loss": 0.0815, + "step": 1430 + }, + { + "epoch": 2.3360634888334944, + "eval_loss": 0.21516536176204681, + "eval_runtime": 34.891, + "eval_samples_per_second": 5.417, + "eval_steps_per_second": 5.417, + "step": 1435 + }, + { + "epoch": 2.3442030828712417, + "grad_norm": 0.5613276958465576, + "learning_rate": 3.119603694627042e-06, + "loss": 0.0923, + "step": 1440 + }, + { + "epoch": 2.3604822709467363, + "grad_norm": 0.8540468811988831, + "learning_rate": 2.9727237013296854e-06, + "loss": 0.1192, + "step": 1450 + }, + { + "epoch": 2.3767614590222315, + "grad_norm": 0.7269755005836487, + "learning_rate": 2.828917519382457e-06, + "loss": 0.0889, + "step": 1460 + }, + { + "epoch": 2.393040647097726, + "grad_norm": 0.6140917539596558, + "learning_rate": 2.6882315453692686e-06, + "loss": 0.0936, + "step": 1470 + }, + { + "epoch": 2.409319835173221, + "grad_norm": 0.4730454981327057, + "learning_rate": 2.550711169192775e-06, + "loss": 0.0976, + "step": 1480 + }, + { + "epoch": 2.4255990232487155, + "grad_norm": 0.5974939465522766, + "learning_rate": 2.4164007594300875e-06, + "loss": 0.0913, + "step": 1490 + }, + { + "epoch": 2.44187821132421, + "grad_norm": 0.6668256521224976, + "learning_rate": 2.2853436490180374e-06, + "loss": 0.0982, + "step": 1500 + }, + { + "epoch": 2.458157399399705, + "grad_norm": 0.6182997226715088, + "learning_rate": 2.1575821212725334e-06, + "loss": 0.0861, + "step": 1510 + }, + { + "epoch": 2.4744365874752, + "grad_norm": 0.5460255146026611, + "learning_rate": 2.0331573962465864e-06, + "loss": 0.086, + "step": 1520 + }, + { + "epoch": 2.4907157755506946, + "grad_norm": 0.6361858248710632, + "learning_rate": 1.912109617431372e-06, + "loss": 0.0911, + "step": 1530 + }, + { + "epoch": 2.5069949636261892, + "grad_norm": 0.8699812889099121, + "learning_rate": 1.7944778388046243e-06, + "loss": 0.0884, + "step": 1540 + }, + { + "epoch": 2.523274151701684, + "grad_norm": 0.5886068344116211, + "learning_rate": 1.680300012230543e-06, + "loss": 0.1027, + "step": 1550 + }, + { + "epoch": 2.5395533397771786, + "grad_norm": 0.6138848066329956, + "learning_rate": 1.5696129752152774e-06, + "loss": 0.0939, + "step": 1560 + }, + { + "epoch": 2.5558325278526732, + "grad_norm": 0.7268607020378113, + "learning_rate": 1.4624524390219455e-06, + "loss": 0.083, + "step": 1570 + }, + { + "epoch": 2.572111715928168, + "grad_norm": 0.619888961315155, + "learning_rate": 1.3588529771490054e-06, + "loss": 0.1087, + "step": 1580 + }, + { + "epoch": 2.5883909040036626, + "grad_norm": 0.5299406051635742, + "learning_rate": 1.2588480141757204e-06, + "loss": 0.0997, + "step": 1590 + }, + { + "epoch": 2.6046700920791577, + "grad_norm": 0.6051465272903442, + "learning_rate": 1.1624698149782842e-06, + "loss": 0.0953, + "step": 1600 + }, + { + "epoch": 2.6209492801546523, + "grad_norm": 0.6585546135902405, + "learning_rate": 1.0697494743201226e-06, + "loss": 0.1057, + "step": 1610 + }, + { + "epoch": 2.637228468230147, + "grad_norm": 0.5243381261825562, + "learning_rate": 9.807169068197008e-07, + "loss": 0.09, + "step": 1620 + }, + { + "epoch": 2.6535076563056417, + "grad_norm": 0.6636092066764832, + "learning_rate": 8.95400837299093e-07, + "loss": 0.061, + "step": 1630 + }, + { + "epoch": 2.6697868443811363, + "grad_norm": 0.6529124975204468, + "learning_rate": 8.138287915164078e-07, + "loss": 0.0897, + "step": 1640 + }, + { + "epoch": 2.6697868443811363, + "eval_loss": 0.21331782639026642, + "eval_runtime": 34.9348, + "eval_samples_per_second": 5.41, + "eval_steps_per_second": 5.41, + "step": 1640 + }, + { + "epoch": 2.6860660324566314, + "grad_norm": 0.7361763715744019, + "learning_rate": 7.360270872850808e-07, + "loss": 0.0983, + "step": 1650 + }, + { + "epoch": 2.702345220532126, + "grad_norm": 0.7820421457290649, + "learning_rate": 6.620208259828855e-07, + "loss": 0.0724, + "step": 1660 + }, + { + "epoch": 2.718624408607621, + "grad_norm": 0.47821661829948425, + "learning_rate": 5.918338844534077e-07, + "loss": 0.0906, + "step": 1670 + }, + { + "epoch": 2.7349035966831154, + "grad_norm": 0.5179721713066101, + "learning_rate": 5.25488907302589e-07, + "loss": 0.0851, + "step": 1680 + }, + { + "epoch": 2.75118278475861, + "grad_norm": 0.7704452872276306, + "learning_rate": 4.63007299592845e-07, + "loss": 0.0765, + "step": 1690 + }, + { + "epoch": 2.7674619728341048, + "grad_norm": 0.6302313208580017, + "learning_rate": 4.044092199370797e-07, + "loss": 0.093, + "step": 1700 + }, + { + "epoch": 2.7837411609095994, + "grad_norm": 0.43464457988739014, + "learning_rate": 3.497135739948657e-07, + "loss": 0.0949, + "step": 1710 + }, + { + "epoch": 2.800020348985094, + "grad_norm": 0.6571847796440125, + "learning_rate": 2.98938008372851e-07, + "loss": 0.0897, + "step": 1720 + }, + { + "epoch": 2.816299537060589, + "grad_norm": 0.542305052280426, + "learning_rate": 2.520989049313957e-07, + "loss": 0.0968, + "step": 1730 + }, + { + "epoch": 2.832578725136084, + "grad_norm": 0.5765232443809509, + "learning_rate": 2.0921137549923946e-07, + "loss": 0.0782, + "step": 1740 + }, + { + "epoch": 2.8488579132115786, + "grad_norm": 0.6098420023918152, + "learning_rate": 1.702892569979353e-07, + "loss": 0.0808, + "step": 1750 + }, + { + "epoch": 2.865137101287073, + "grad_norm": 0.5190752148628235, + "learning_rate": 1.353451069776024e-07, + "loss": 0.106, + "step": 1760 + }, + { + "epoch": 2.881416289362568, + "grad_norm": 0.5709157586097717, + "learning_rate": 1.0439019956544893e-07, + "loss": 0.104, + "step": 1770 + }, + { + "epoch": 2.897695477438063, + "grad_norm": 0.6572442054748535, + "learning_rate": 7.743452182837202e-08, + "loss": 0.1155, + "step": 1780 + }, + { + "epoch": 2.9139746655135577, + "grad_norm": 0.8765654563903809, + "learning_rate": 5.448677055080453e-08, + "loss": 0.1118, + "step": 1790 + }, + { + "epoch": 2.9302538535890523, + "grad_norm": 0.3849591910839081, + "learning_rate": 3.555434942884156e-08, + "loss": 0.097, + "step": 1800 + }, + { + "epoch": 2.946533041664547, + "grad_norm": 0.6078172922134399, + "learning_rate": 2.06433666815678e-08, + "loss": 0.0987, + "step": 1810 + }, + { + "epoch": 2.9628122297400417, + "grad_norm": 0.7132030129432678, + "learning_rate": 9.758633080352019e-09, + "loss": 0.0866, + "step": 1820 + }, + { + "epoch": 2.9790914178155363, + "grad_norm": 0.879240870475769, + "learning_rate": 2.903660396723351e-09, + "loss": 0.0863, + "step": 1830 + }, + { + "epoch": 2.995370605891031, + "grad_norm": 0.6857780814170837, + "learning_rate": 8.066026937064709e-11, + "loss": 0.1078, + "step": 1840 + } + ], + "logging_steps": 10, + "max_steps": 1842, + "num_input_tokens_seen": 0, + "num_train_epochs": 3, + "save_steps": 205, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": true + }, + "attributes": {} + } + }, + "total_flos": 1.5737761155189965e+18, + "train_batch_size": 1, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-1842/training_args.bin b/checkpoint-1842/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..2245771e9af5d2e40208df60ece818e12dad2609 --- /dev/null +++ b/checkpoint-1842/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fba26e4087d6f0d4a22136ed77169fa2a327b3a4a3accd6168e4c2098fb9b7b6 +size 5624 diff --git a/checkpoint-205/README.md b/checkpoint-205/README.md new file mode 100644 index 0000000000000000000000000000000000000000..20c165c7feaa880a18e0662941a5b246a698be2e --- /dev/null +++ b/checkpoint-205/README.md @@ -0,0 +1,209 @@ +--- +base_model: Salesforce/Llama-xLAM-2-8b-fc-r +library_name: peft +pipeline_tag: text-generation +tags: +- base_model:adapter:Salesforce/Llama-xLAM-2-8b-fc-r +- lora +- sft +- transformers +- trl +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.17.1 \ No newline at end of file diff --git a/checkpoint-205/adapter_config.json b/checkpoint-205/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..9eda25bff7c128105e91aa1a00507aaa31dd53fd --- /dev/null +++ b/checkpoint-205/adapter_config.json @@ -0,0 +1,39 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "Salesforce/Llama-xLAM-2-8b-fc-r", + "bias": "none", + "corda_config": null, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 32, + "lora_bias": false, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "qalora_group_size": 16, + "r": 16, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "q_proj", + "v_proj", + "k_proj", + "o_proj" + ], + "target_parameters": null, + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/checkpoint-205/adapter_model.safetensors b/checkpoint-205/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ad524753021233e25187eec79f42993616b937dd --- /dev/null +++ b/checkpoint-205/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8b16416f9c3fd79fbf7ff7d79b3309d1cd9c1f452ffafb4f7d9b22362b8a5627 +size 54560368 diff --git a/checkpoint-205/optimizer.pt b/checkpoint-205/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..67075a4ad1475a4185658fc0c6860aaac8cfbb81 --- /dev/null +++ b/checkpoint-205/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cad4b8e6fd367128f87c2d1d03b7769b927275674d5f4496fb085ade75e2741f +size 109267450 diff --git a/checkpoint-205/rng_state.pth b/checkpoint-205/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..67bf3088a5e5a5d06bf66a58cdf2df5e4c4fea0b --- /dev/null +++ b/checkpoint-205/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6f95d2a6e576bfe36c5e44d810d1e63ec9ae22fc4d77764995f161472ce3a2d1 +size 14244 diff --git a/checkpoint-205/scheduler.pt b/checkpoint-205/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..1fbca54621c207453e1ccf140feea5a73b10162b --- /dev/null +++ b/checkpoint-205/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2813c80d0e58a667bf2278e70213ccb80d9506378fe345713dc40e17380e846a +size 1064 diff --git a/checkpoint-205/special_tokens_map.json b/checkpoint-205/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..1c69f55fd7bb10288d16d4c76497e21e25adfe4f --- /dev/null +++ b/checkpoint-205/special_tokens_map.json @@ -0,0 +1,27 @@ +{ + "additional_special_tokens": [ + "<|eot_id|>", + "<|eom_id|>" + ], + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/checkpoint-205/tokenizer.json b/checkpoint-205/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..1c1d8d5c9024994f1d3b00f9662b8dd89ca13cf2 --- /dev/null +++ b/checkpoint-205/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b9e4e7fb171f92fd137b777cc2714bf87d11576700a1dcd7a399e7bbe39537b +size 17209920 diff --git a/checkpoint-205/tokenizer_config.json b/checkpoint-205/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..3c3fc3b2c234aa8b39e8ee01e3ee9e6fe54a2a1f --- /dev/null +++ b/checkpoint-205/tokenizer_config.json @@ -0,0 +1,2070 @@ +{ + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|python_tag|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "additional_special_tokens": [ + "<|eot_id|>", + "<|eom_id|>" + ], + "bos_token": "<|begin_of_text|>", + "chat_template": "{{- bos_token }}\n{%- if custom_tools is defined %}\n {%- set tools = custom_tools %}\n{%- endif %}\n{%- if not tools_in_user_message is defined %}\n {%- set tools_in_user_message = true %}\n{%- endif %}\n{%- if not tools is defined %}\n {%- set tools = none %}\n{%- endif %}\n\n{#- Extract system message #}\n{{- \"<|start_header_id|>system<|end_header_id|>\\n\\n\" }}\n{%- if messages[0]['role'] == 'system' %}\n {%- set system_message = messages[0]['content'] | trim %}\n {%- set messages = messages[1:] %}\n {{- system_message + \"\\n\" }}\n{%- else %}\n {%- set system_message = \"You are a helpful assistant that can use tools. You are developed by Salesforce xLAM team.\" %}\n {% set format_instruction %}You have access to a set of tools. When using tools, make calls in a single JSON array: \n\n[{\"name\": \"tool_call_name\", \"arguments\": {\"arg1\": \"value1\", \"arg2\": \"value2\"}}, ... (additional parallel tool calls as needed)]\n\nIf no tool is suitable, state that explicitly. If the user's input lacks required parameters, ask for clarification. Do not interpret or respond until tool results are returned. Once they are available, process them or make additional calls if needed. For tasks that don't require tools, such as casual conversation or general advice, respond directly in plain text. The available tools are:{% endset %}\n {{- system_message + \"\\n\" }}\n {%- if tools is not none %}\n {{- format_instruction + \"\\n\\n\" }}\n {%- endif %}\n{%- endif %}\n\n\n{%- if tools is not none %}\n {%- for t in tools %}\n {{- t | tojson(indent=4) }}\n {{- \"\\n\\n\" }}\n {%- endfor %}\n{%- endif %}\n{{- \"<|eot_id|>\" }}\n\n{%- for message in messages %}\n {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %}\n {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\\n\\n'+ message['content'] | trim + '<|eot_id|>' }}\n {%- elif 'tool_calls' in message %}\n {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' -}}\n {%- if message['tool_calls'] %}\n {{- \"[\" }}\n {%- for tool_call_function in message.tool_calls %}\n {%- set tool_call = tool_call_function.function %}\n {{- '{\"name\": \"' + tool_call.name + '\", ' }}\n {{- '\"arguments\": ' }}\n {{- tool_call.arguments | tojson }}\n {{- \"}\" }}\n {%- if not loop.last %}\n {{- \", \" }}\n {%- endif %}\n {%- endfor %}\n {{- \"]\" }}\n {{- \"<|eot_id|>\" }}\n {%- elif message['content'] %}\n {{- message['content'] | trim + '<|eot_id|>' }}\n {%- else %}\n {{- \"[]\\n\" + '<|eot_id|>' }}\n {%- endif %}\n {%- elif message.role == \"tool\" or message.role == \"ipython\" %}\n {{- \"<|start_header_id|>\" + \"ipython\" + \"<|end_header_id|>\\n\\n\" }}\n {%- set content = message[\"content\"] %}\n {%- if content is mapping or (content is iterable and content is not string) %}\n {{- content | tojson }}\n {%- else %}\n {{- content }}\n {%- endif %}\n {{- \"<|eot_id|>\" }}\n {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' }}\n{%- endif %}", + "clean_up_tokenization_spaces": true, + "eos_token": "<|eot_id|>", + "extra_special_tokens": {}, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 16384, + "pad_token": "<|eot_id|>", + "padding_side": "right", + "split_special_tokens": false, + "tokenizer_class": "PreTrainedTokenizerFast" +} diff --git a/checkpoint-205/trainer_state.json b/checkpoint-205/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..32cf0b098cbb4bacb27e9b5fc8477df52887dca0 --- /dev/null +++ b/checkpoint-205/trainer_state.json @@ -0,0 +1,181 @@ +{ + "best_metric": 0.3089325428009033, + "best_model_checkpoint": "./xlam_lora_new_ete_over_size_3epoch_multi_mix/checkpoint-205", + "epoch": 0.33372335554764204, + "eval_steps": 205, + "global_step": 205, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.016279188075494735, + "grad_norm": 0.7572630643844604, + "learning_rate": 2.688172043010753e-06, + "loss": 0.5223, + "step": 10 + }, + { + "epoch": 0.03255837615098947, + "grad_norm": 0.417061984539032, + "learning_rate": 5.376344086021506e-06, + "loss": 0.4858, + "step": 20 + }, + { + "epoch": 0.048837564226484206, + "grad_norm": 0.3718095123767853, + "learning_rate": 8.064516129032258e-06, + "loss": 0.4246, + "step": 30 + }, + { + "epoch": 0.06511675230197894, + "grad_norm": 0.2949349582195282, + "learning_rate": 1.0752688172043012e-05, + "loss": 0.4405, + "step": 40 + }, + { + "epoch": 0.08139594037747368, + "grad_norm": 0.3159159719944, + "learning_rate": 1.3440860215053763e-05, + "loss": 0.4148, + "step": 50 + }, + { + "epoch": 0.09767512845296841, + "grad_norm": 0.4167034327983856, + "learning_rate": 1.6129032258064517e-05, + "loss": 0.3393, + "step": 60 + }, + { + "epoch": 0.11395431652846315, + "grad_norm": 0.39410400390625, + "learning_rate": 1.881720430107527e-05, + "loss": 0.2464, + "step": 70 + }, + { + "epoch": 0.13023350460395788, + "grad_norm": 0.3644021153450012, + "learning_rate": 2.1505376344086024e-05, + "loss": 0.2294, + "step": 80 + }, + { + "epoch": 0.1465126926794526, + "grad_norm": 0.30372634530067444, + "learning_rate": 2.4193548387096777e-05, + "loss": 0.2315, + "step": 90 + }, + { + "epoch": 0.16279188075494735, + "grad_norm": 0.2586315870285034, + "learning_rate": 2.4999011923655086e-05, + "loss": 0.1932, + "step": 100 + }, + { + "epoch": 0.17907106883044208, + "grad_norm": 0.37825971841812134, + "learning_rate": 2.4994172742085852e-05, + "loss": 0.2204, + "step": 110 + }, + { + "epoch": 0.19535025690593683, + "grad_norm": 0.21422357857227325, + "learning_rate": 2.4985302531208654e-05, + "loss": 0.1795, + "step": 120 + }, + { + "epoch": 0.21162944498143155, + "grad_norm": 0.2566869854927063, + "learning_rate": 2.4972404152844008e-05, + "loss": 0.1668, + "step": 130 + }, + { + "epoch": 0.2279086330569263, + "grad_norm": 0.28194501996040344, + "learning_rate": 2.49554817684312e-05, + "loss": 0.1476, + "step": 140 + }, + { + "epoch": 0.24418782113242102, + "grad_norm": 0.24139340221881866, + "learning_rate": 2.4934540837685647e-05, + "loss": 0.1609, + "step": 150 + }, + { + "epoch": 0.26046700920791577, + "grad_norm": 0.3306334614753723, + "learning_rate": 2.490958811683741e-05, + "loss": 0.1638, + "step": 160 + }, + { + "epoch": 0.2767461972834105, + "grad_norm": 0.27301114797592163, + "learning_rate": 2.4880631656451447e-05, + "loss": 0.1494, + "step": 170 + }, + { + "epoch": 0.2930253853589052, + "grad_norm": 0.34037259221076965, + "learning_rate": 2.484768079883018e-05, + "loss": 0.1534, + "step": 180 + }, + { + "epoch": 0.30930457343439993, + "grad_norm": 0.2306762933731079, + "learning_rate": 2.4810746174999418e-05, + "loss": 0.1749, + "step": 190 + }, + { + "epoch": 0.3255837615098947, + "grad_norm": 0.3183388113975525, + "learning_rate": 2.476983970127841e-05, + "loss": 0.1482, + "step": 200 + }, + { + "epoch": 0.33372335554764204, + "eval_loss": 0.3089325428009033, + "eval_runtime": 34.8769, + "eval_samples_per_second": 5.419, + "eval_steps_per_second": 5.419, + "step": 205 + } + ], + "logging_steps": 10, + "max_steps": 1842, + "num_input_tokens_seen": 0, + "num_train_epochs": 3, + "save_steps": 205, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 1.7404062066376704e+17, + "train_batch_size": 1, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-205/training_args.bin b/checkpoint-205/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..2245771e9af5d2e40208df60ece818e12dad2609 --- /dev/null +++ b/checkpoint-205/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fba26e4087d6f0d4a22136ed77169fa2a327b3a4a3accd6168e4c2098fb9b7b6 +size 5624 diff --git a/checkpoint-410/README.md b/checkpoint-410/README.md new file mode 100644 index 0000000000000000000000000000000000000000..20c165c7feaa880a18e0662941a5b246a698be2e --- /dev/null +++ b/checkpoint-410/README.md @@ -0,0 +1,209 @@ +--- +base_model: Salesforce/Llama-xLAM-2-8b-fc-r +library_name: peft +pipeline_tag: text-generation +tags: +- base_model:adapter:Salesforce/Llama-xLAM-2-8b-fc-r +- lora +- sft +- transformers +- trl +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.17.1 \ No newline at end of file diff --git a/checkpoint-410/adapter_config.json b/checkpoint-410/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..9eda25bff7c128105e91aa1a00507aaa31dd53fd --- /dev/null +++ b/checkpoint-410/adapter_config.json @@ -0,0 +1,39 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "Salesforce/Llama-xLAM-2-8b-fc-r", + "bias": "none", + "corda_config": null, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 32, + "lora_bias": false, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "qalora_group_size": 16, + "r": 16, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "q_proj", + "v_proj", + "k_proj", + "o_proj" + ], + "target_parameters": null, + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/checkpoint-410/adapter_model.safetensors b/checkpoint-410/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..df55d3f53874e87dcdc71727f173d59622d4cc61 --- /dev/null +++ b/checkpoint-410/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:832eea0ff7975ccbfdb1013a76b538b9ff00a1a77c7cdf4347facb7100ea9fec +size 54560368 diff --git a/checkpoint-410/optimizer.pt b/checkpoint-410/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..0f6620878c0a46c2f84ccaeb5db63316216ec07f --- /dev/null +++ b/checkpoint-410/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:99a5c882b589f545ff11610ef5b485db8929798da14c3292f667ffca4668d87c +size 109267450 diff --git a/checkpoint-410/rng_state.pth b/checkpoint-410/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..6f98d5cc3c9a26483445b6fceb8c645105f71780 --- /dev/null +++ b/checkpoint-410/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a3d3e1a2edcba839b2964e0d06670eefd25844beb0a3181f581acf23ffb53817 +size 14244 diff --git a/checkpoint-410/scheduler.pt b/checkpoint-410/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..41f672ca898bd206b25ad6301f376caed016ffcb --- /dev/null +++ b/checkpoint-410/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e8719299ffe92aa4226530a5856dfb90af8b2a6db389ebfa1a8de34a1a2545eb +size 1064 diff --git a/checkpoint-410/special_tokens_map.json b/checkpoint-410/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..1c69f55fd7bb10288d16d4c76497e21e25adfe4f --- /dev/null +++ b/checkpoint-410/special_tokens_map.json @@ -0,0 +1,27 @@ +{ + "additional_special_tokens": [ + "<|eot_id|>", + "<|eom_id|>" + ], + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/checkpoint-410/tokenizer.json b/checkpoint-410/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..1c1d8d5c9024994f1d3b00f9662b8dd89ca13cf2 --- /dev/null +++ b/checkpoint-410/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b9e4e7fb171f92fd137b777cc2714bf87d11576700a1dcd7a399e7bbe39537b +size 17209920 diff --git a/checkpoint-410/tokenizer_config.json b/checkpoint-410/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..3c3fc3b2c234aa8b39e8ee01e3ee9e6fe54a2a1f --- /dev/null +++ b/checkpoint-410/tokenizer_config.json @@ -0,0 +1,2070 @@ +{ + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|python_tag|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "additional_special_tokens": [ + "<|eot_id|>", + "<|eom_id|>" + ], + "bos_token": "<|begin_of_text|>", + "chat_template": "{{- bos_token }}\n{%- if custom_tools is defined %}\n {%- set tools = custom_tools %}\n{%- endif %}\n{%- if not tools_in_user_message is defined %}\n {%- set tools_in_user_message = true %}\n{%- endif %}\n{%- if not tools is defined %}\n {%- set tools = none %}\n{%- endif %}\n\n{#- Extract system message #}\n{{- \"<|start_header_id|>system<|end_header_id|>\\n\\n\" }}\n{%- if messages[0]['role'] == 'system' %}\n {%- set system_message = messages[0]['content'] | trim %}\n {%- set messages = messages[1:] %}\n {{- system_message + \"\\n\" }}\n{%- else %}\n {%- set system_message = \"You are a helpful assistant that can use tools. You are developed by Salesforce xLAM team.\" %}\n {% set format_instruction %}You have access to a set of tools. When using tools, make calls in a single JSON array: \n\n[{\"name\": \"tool_call_name\", \"arguments\": {\"arg1\": \"value1\", \"arg2\": \"value2\"}}, ... (additional parallel tool calls as needed)]\n\nIf no tool is suitable, state that explicitly. If the user's input lacks required parameters, ask for clarification. Do not interpret or respond until tool results are returned. Once they are available, process them or make additional calls if needed. For tasks that don't require tools, such as casual conversation or general advice, respond directly in plain text. The available tools are:{% endset %}\n {{- system_message + \"\\n\" }}\n {%- if tools is not none %}\n {{- format_instruction + \"\\n\\n\" }}\n {%- endif %}\n{%- endif %}\n\n\n{%- if tools is not none %}\n {%- for t in tools %}\n {{- t | tojson(indent=4) }}\n {{- \"\\n\\n\" }}\n {%- endfor %}\n{%- endif %}\n{{- \"<|eot_id|>\" }}\n\n{%- for message in messages %}\n {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %}\n {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\\n\\n'+ message['content'] | trim + '<|eot_id|>' }}\n {%- elif 'tool_calls' in message %}\n {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' -}}\n {%- if message['tool_calls'] %}\n {{- \"[\" }}\n {%- for tool_call_function in message.tool_calls %}\n {%- set tool_call = tool_call_function.function %}\n {{- '{\"name\": \"' + tool_call.name + '\", ' }}\n {{- '\"arguments\": ' }}\n {{- tool_call.arguments | tojson }}\n {{- \"}\" }}\n {%- if not loop.last %}\n {{- \", \" }}\n {%- endif %}\n {%- endfor %}\n {{- \"]\" }}\n {{- \"<|eot_id|>\" }}\n {%- elif message['content'] %}\n {{- message['content'] | trim + '<|eot_id|>' }}\n {%- else %}\n {{- \"[]\\n\" + '<|eot_id|>' }}\n {%- endif %}\n {%- elif message.role == \"tool\" or message.role == \"ipython\" %}\n {{- \"<|start_header_id|>\" + \"ipython\" + \"<|end_header_id|>\\n\\n\" }}\n {%- set content = message[\"content\"] %}\n {%- if content is mapping or (content is iterable and content is not string) %}\n {{- content | tojson }}\n {%- else %}\n {{- content }}\n {%- endif %}\n {{- \"<|eot_id|>\" }}\n {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' }}\n{%- endif %}", + "clean_up_tokenization_spaces": true, + "eos_token": "<|eot_id|>", + "extra_special_tokens": {}, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 16384, + "pad_token": "<|eot_id|>", + "padding_side": "right", + "split_special_tokens": false, + "tokenizer_class": "PreTrainedTokenizerFast" +} diff --git a/checkpoint-410/trainer_state.json b/checkpoint-410/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..607805bec17b72bcdb212ba8c831c3a4dc029fbe --- /dev/null +++ b/checkpoint-410/trainer_state.json @@ -0,0 +1,336 @@ +{ + "best_metric": 0.26128318905830383, + "best_model_checkpoint": "./xlam_lora_new_ete_over_size_3epoch_multi_mix/checkpoint-410", + "epoch": 0.6674467110952841, + "eval_steps": 205, + "global_step": 410, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.016279188075494735, + "grad_norm": 0.7572630643844604, + "learning_rate": 2.688172043010753e-06, + "loss": 0.5223, + "step": 10 + }, + { + "epoch": 0.03255837615098947, + "grad_norm": 0.417061984539032, + "learning_rate": 5.376344086021506e-06, + "loss": 0.4858, + "step": 20 + }, + { + "epoch": 0.048837564226484206, + "grad_norm": 0.3718095123767853, + "learning_rate": 8.064516129032258e-06, + "loss": 0.4246, + "step": 30 + }, + { + "epoch": 0.06511675230197894, + "grad_norm": 0.2949349582195282, + "learning_rate": 1.0752688172043012e-05, + "loss": 0.4405, + "step": 40 + }, + { + "epoch": 0.08139594037747368, + "grad_norm": 0.3159159719944, + "learning_rate": 1.3440860215053763e-05, + "loss": 0.4148, + "step": 50 + }, + { + "epoch": 0.09767512845296841, + "grad_norm": 0.4167034327983856, + "learning_rate": 1.6129032258064517e-05, + "loss": 0.3393, + "step": 60 + }, + { + "epoch": 0.11395431652846315, + "grad_norm": 0.39410400390625, + "learning_rate": 1.881720430107527e-05, + "loss": 0.2464, + "step": 70 + }, + { + "epoch": 0.13023350460395788, + "grad_norm": 0.3644021153450012, + "learning_rate": 2.1505376344086024e-05, + "loss": 0.2294, + "step": 80 + }, + { + "epoch": 0.1465126926794526, + "grad_norm": 0.30372634530067444, + "learning_rate": 2.4193548387096777e-05, + "loss": 0.2315, + "step": 90 + }, + { + "epoch": 0.16279188075494735, + "grad_norm": 0.2586315870285034, + "learning_rate": 2.4999011923655086e-05, + "loss": 0.1932, + "step": 100 + }, + { + "epoch": 0.17907106883044208, + "grad_norm": 0.37825971841812134, + "learning_rate": 2.4994172742085852e-05, + "loss": 0.2204, + "step": 110 + }, + { + "epoch": 0.19535025690593683, + "grad_norm": 0.21422357857227325, + "learning_rate": 2.4985302531208654e-05, + "loss": 0.1795, + "step": 120 + }, + { + "epoch": 0.21162944498143155, + "grad_norm": 0.2566869854927063, + "learning_rate": 2.4972404152844008e-05, + "loss": 0.1668, + "step": 130 + }, + { + "epoch": 0.2279086330569263, + "grad_norm": 0.28194501996040344, + "learning_rate": 2.49554817684312e-05, + "loss": 0.1476, + "step": 140 + }, + { + "epoch": 0.24418782113242102, + "grad_norm": 0.24139340221881866, + "learning_rate": 2.4934540837685647e-05, + "loss": 0.1609, + "step": 150 + }, + { + "epoch": 0.26046700920791577, + "grad_norm": 0.3306334614753723, + "learning_rate": 2.490958811683741e-05, + "loss": 0.1638, + "step": 160 + }, + { + "epoch": 0.2767461972834105, + "grad_norm": 0.27301114797592163, + "learning_rate": 2.4880631656451447e-05, + "loss": 0.1494, + "step": 170 + }, + { + "epoch": 0.2930253853589052, + "grad_norm": 0.34037259221076965, + "learning_rate": 2.484768079883018e-05, + "loss": 0.1534, + "step": 180 + }, + { + "epoch": 0.30930457343439993, + "grad_norm": 0.2306762933731079, + "learning_rate": 2.4810746174999418e-05, + "loss": 0.1749, + "step": 190 + }, + { + "epoch": 0.3255837615098947, + "grad_norm": 0.3183388113975525, + "learning_rate": 2.476983970127841e-05, + "loss": 0.1482, + "step": 200 + }, + { + "epoch": 0.33372335554764204, + "eval_loss": 0.3089325428009033, + "eval_runtime": 34.8769, + "eval_samples_per_second": 5.419, + "eval_steps_per_second": 5.419, + "step": 205 + }, + { + "epoch": 0.34186294958538943, + "grad_norm": 0.28704971075057983, + "learning_rate": 2.472497457543525e-05, + "loss": 0.1471, + "step": 210 + }, + { + "epoch": 0.35814213766088415, + "grad_norm": 0.2939195930957794, + "learning_rate": 2.4676165272428866e-05, + "loss": 0.1631, + "step": 220 + }, + { + "epoch": 0.3744213257363789, + "grad_norm": 0.31506845355033875, + "learning_rate": 2.4623427539738897e-05, + "loss": 0.1353, + "step": 230 + }, + { + "epoch": 0.39070051381187365, + "grad_norm": 0.3761660158634186, + "learning_rate": 2.456677839228506e-05, + "loss": 0.1716, + "step": 240 + }, + { + "epoch": 0.4069797018873684, + "grad_norm": 0.29187777638435364, + "learning_rate": 2.450623610693757e-05, + "loss": 0.1195, + "step": 250 + }, + { + "epoch": 0.4232588899628631, + "grad_norm": 0.46237581968307495, + "learning_rate": 2.4441820216620425e-05, + "loss": 0.1484, + "step": 260 + }, + { + "epoch": 0.4395380780383578, + "grad_norm": 0.4580917954444885, + "learning_rate": 2.437355150400945e-05, + "loss": 0.1009, + "step": 270 + }, + { + "epoch": 0.4558172661138526, + "grad_norm": 0.4181467890739441, + "learning_rate": 2.4301451994827112e-05, + "loss": 0.1376, + "step": 280 + }, + { + "epoch": 0.4720964541893473, + "grad_norm": 0.3629908561706543, + "learning_rate": 2.422554495073633e-05, + "loss": 0.1083, + "step": 290 + }, + { + "epoch": 0.48837564226484204, + "grad_norm": 0.4282682240009308, + "learning_rate": 2.4145854861835447e-05, + "loss": 0.1373, + "step": 300 + }, + { + "epoch": 0.5046548303403368, + "grad_norm": 0.4914080500602722, + "learning_rate": 2.406240743875699e-05, + "loss": 0.156, + "step": 310 + }, + { + "epoch": 0.5209340184158315, + "grad_norm": 0.3880573511123657, + "learning_rate": 2.3975229604372526e-05, + "loss": 0.1415, + "step": 320 + }, + { + "epoch": 0.5372132064913262, + "grad_norm": 0.42599862813949585, + "learning_rate": 2.3884349485106477e-05, + "loss": 0.1338, + "step": 330 + }, + { + "epoch": 0.553492394566821, + "grad_norm": 0.4339046776294708, + "learning_rate": 2.378979640186163e-05, + "loss": 0.1368, + "step": 340 + }, + { + "epoch": 0.5697715826423158, + "grad_norm": 0.30713170766830444, + "learning_rate": 2.3691600860559222e-05, + "loss": 0.1154, + "step": 350 + }, + { + "epoch": 0.5860507707178104, + "grad_norm": 0.4618566036224365, + "learning_rate": 2.3589794542296764e-05, + "loss": 0.1203, + "step": 360 + }, + { + "epoch": 0.6023299587933052, + "grad_norm": 0.40802672505378723, + "learning_rate": 2.3484410293126664e-05, + "loss": 0.1144, + "step": 370 + }, + { + "epoch": 0.6186091468687999, + "grad_norm": 0.5242702960968018, + "learning_rate": 2.3375482113459014e-05, + "loss": 0.1281, + "step": 380 + }, + { + "epoch": 0.6348883349442946, + "grad_norm": 0.4045926630496979, + "learning_rate": 2.3263045147091944e-05, + "loss": 0.1145, + "step": 390 + }, + { + "epoch": 0.6511675230197894, + "grad_norm": 0.5347346067428589, + "learning_rate": 2.3147135669873096e-05, + "loss": 0.1256, + "step": 400 + }, + { + "epoch": 0.6674467110952841, + "grad_norm": 0.4755608141422272, + "learning_rate": 2.302779107799583e-05, + "loss": 0.1251, + "step": 410 + }, + { + "epoch": 0.6674467110952841, + "eval_loss": 0.26128318905830383, + "eval_runtime": 34.9177, + "eval_samples_per_second": 5.413, + "eval_steps_per_second": 5.413, + "step": 410 + } + ], + "logging_steps": 10, + "max_steps": 1842, + "num_input_tokens_seen": 0, + "num_train_epochs": 3, + "save_steps": 205, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 3.4947752742912e+17, + "train_batch_size": 1, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-410/training_args.bin b/checkpoint-410/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..2245771e9af5d2e40208df60ece818e12dad2609 --- /dev/null +++ b/checkpoint-410/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fba26e4087d6f0d4a22136ed77169fa2a327b3a4a3accd6168e4c2098fb9b7b6 +size 5624 diff --git a/checkpoint-615/README.md b/checkpoint-615/README.md new file mode 100644 index 0000000000000000000000000000000000000000..20c165c7feaa880a18e0662941a5b246a698be2e --- /dev/null +++ b/checkpoint-615/README.md @@ -0,0 +1,209 @@ +--- +base_model: Salesforce/Llama-xLAM-2-8b-fc-r +library_name: peft +pipeline_tag: text-generation +tags: +- base_model:adapter:Salesforce/Llama-xLAM-2-8b-fc-r +- lora +- sft +- transformers +- trl +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.17.1 \ No newline at end of file diff --git a/checkpoint-615/adapter_config.json b/checkpoint-615/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..9eda25bff7c128105e91aa1a00507aaa31dd53fd --- /dev/null +++ b/checkpoint-615/adapter_config.json @@ -0,0 +1,39 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "Salesforce/Llama-xLAM-2-8b-fc-r", + "bias": "none", + "corda_config": null, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 32, + "lora_bias": false, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "qalora_group_size": 16, + "r": 16, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "q_proj", + "v_proj", + "k_proj", + "o_proj" + ], + "target_parameters": null, + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/checkpoint-615/adapter_model.safetensors b/checkpoint-615/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f91d3241883317f882e76af8d86fe5c09297c9d5 --- /dev/null +++ b/checkpoint-615/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e463fbf416f1a63109f2da4c1df14ec4c32dd542db05ed8f232c6857bd348e7f +size 54560368 diff --git a/checkpoint-615/optimizer.pt b/checkpoint-615/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..193ecb2262f0a261c08d10df23ee8b85362aa0b5 --- /dev/null +++ b/checkpoint-615/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:92a616095607c4ace48b2086998beb5f00c1b8d99da695bb2ed96f6e898da881 +size 109267450 diff --git a/checkpoint-615/rng_state.pth b/checkpoint-615/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..9d8af3771cc476e1e59278e4d03a3b36ce943c0c --- /dev/null +++ b/checkpoint-615/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f2d05abe8d6b47f70ec0a908e7e722a9158004bf89bd33c1ba70f4efc5ab4844 +size 14244 diff --git a/checkpoint-615/scheduler.pt b/checkpoint-615/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..30869d1f520988ec9e9613b9d5687162a595be30 --- /dev/null +++ b/checkpoint-615/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:431170e274d3782b8bbc4c1733f3d986aca73cfad9304688e6598026258d0d9e +size 1064 diff --git a/checkpoint-615/special_tokens_map.json b/checkpoint-615/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..1c69f55fd7bb10288d16d4c76497e21e25adfe4f --- /dev/null +++ b/checkpoint-615/special_tokens_map.json @@ -0,0 +1,27 @@ +{ + "additional_special_tokens": [ + "<|eot_id|>", + "<|eom_id|>" + ], + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/checkpoint-615/tokenizer.json b/checkpoint-615/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..1c1d8d5c9024994f1d3b00f9662b8dd89ca13cf2 --- /dev/null +++ b/checkpoint-615/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b9e4e7fb171f92fd137b777cc2714bf87d11576700a1dcd7a399e7bbe39537b +size 17209920 diff --git a/checkpoint-615/tokenizer_config.json b/checkpoint-615/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..3c3fc3b2c234aa8b39e8ee01e3ee9e6fe54a2a1f --- /dev/null +++ b/checkpoint-615/tokenizer_config.json @@ -0,0 +1,2070 @@ +{ + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|python_tag|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "additional_special_tokens": [ + "<|eot_id|>", + "<|eom_id|>" + ], + "bos_token": "<|begin_of_text|>", + "chat_template": "{{- bos_token }}\n{%- if custom_tools is defined %}\n {%- set tools = custom_tools %}\n{%- endif %}\n{%- if not tools_in_user_message is defined %}\n {%- set tools_in_user_message = true %}\n{%- endif %}\n{%- if not tools is defined %}\n {%- set tools = none %}\n{%- endif %}\n\n{#- Extract system message #}\n{{- \"<|start_header_id|>system<|end_header_id|>\\n\\n\" }}\n{%- if messages[0]['role'] == 'system' %}\n {%- set system_message = messages[0]['content'] | trim %}\n {%- set messages = messages[1:] %}\n {{- system_message + \"\\n\" }}\n{%- else %}\n {%- set system_message = \"You are a helpful assistant that can use tools. You are developed by Salesforce xLAM team.\" %}\n {% set format_instruction %}You have access to a set of tools. When using tools, make calls in a single JSON array: \n\n[{\"name\": \"tool_call_name\", \"arguments\": {\"arg1\": \"value1\", \"arg2\": \"value2\"}}, ... (additional parallel tool calls as needed)]\n\nIf no tool is suitable, state that explicitly. If the user's input lacks required parameters, ask for clarification. Do not interpret or respond until tool results are returned. Once they are available, process them or make additional calls if needed. For tasks that don't require tools, such as casual conversation or general advice, respond directly in plain text. The available tools are:{% endset %}\n {{- system_message + \"\\n\" }}\n {%- if tools is not none %}\n {{- format_instruction + \"\\n\\n\" }}\n {%- endif %}\n{%- endif %}\n\n\n{%- if tools is not none %}\n {%- for t in tools %}\n {{- t | tojson(indent=4) }}\n {{- \"\\n\\n\" }}\n {%- endfor %}\n{%- endif %}\n{{- \"<|eot_id|>\" }}\n\n{%- for message in messages %}\n {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %}\n {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\\n\\n'+ message['content'] | trim + '<|eot_id|>' }}\n {%- elif 'tool_calls' in message %}\n {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' -}}\n {%- if message['tool_calls'] %}\n {{- \"[\" }}\n {%- for tool_call_function in message.tool_calls %}\n {%- set tool_call = tool_call_function.function %}\n {{- '{\"name\": \"' + tool_call.name + '\", ' }}\n {{- '\"arguments\": ' }}\n {{- tool_call.arguments | tojson }}\n {{- \"}\" }}\n {%- if not loop.last %}\n {{- \", \" }}\n {%- endif %}\n {%- endfor %}\n {{- \"]\" }}\n {{- \"<|eot_id|>\" }}\n {%- elif message['content'] %}\n {{- message['content'] | trim + '<|eot_id|>' }}\n {%- else %}\n {{- \"[]\\n\" + '<|eot_id|>' }}\n {%- endif %}\n {%- elif message.role == \"tool\" or message.role == \"ipython\" %}\n {{- \"<|start_header_id|>\" + \"ipython\" + \"<|end_header_id|>\\n\\n\" }}\n {%- set content = message[\"content\"] %}\n {%- if content is mapping or (content is iterable and content is not string) %}\n {{- content | tojson }}\n {%- else %}\n {{- content }}\n {%- endif %}\n {{- \"<|eot_id|>\" }}\n {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' }}\n{%- endif %}", + "clean_up_tokenization_spaces": true, + "eos_token": "<|eot_id|>", + "extra_special_tokens": {}, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 16384, + "pad_token": "<|eot_id|>", + "padding_side": "right", + "split_special_tokens": false, + "tokenizer_class": "PreTrainedTokenizerFast" +} diff --git a/checkpoint-615/trainer_state.json b/checkpoint-615/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..fb381f76f3cf8575df696f4b561018668e894030 --- /dev/null +++ b/checkpoint-615/trainer_state.json @@ -0,0 +1,484 @@ +{ + "best_metric": 0.2388339340686798, + "best_model_checkpoint": "./xlam_lora_new_ete_over_size_3epoch_multi_mix/checkpoint-615", + "epoch": 1.0011700666429262, + "eval_steps": 205, + "global_step": 615, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.016279188075494735, + "grad_norm": 0.7572630643844604, + "learning_rate": 2.688172043010753e-06, + "loss": 0.5223, + "step": 10 + }, + { + "epoch": 0.03255837615098947, + "grad_norm": 0.417061984539032, + "learning_rate": 5.376344086021506e-06, + "loss": 0.4858, + "step": 20 + }, + { + "epoch": 0.048837564226484206, + "grad_norm": 0.3718095123767853, + "learning_rate": 8.064516129032258e-06, + "loss": 0.4246, + "step": 30 + }, + { + "epoch": 0.06511675230197894, + "grad_norm": 0.2949349582195282, + "learning_rate": 1.0752688172043012e-05, + "loss": 0.4405, + "step": 40 + }, + { + "epoch": 0.08139594037747368, + "grad_norm": 0.3159159719944, + "learning_rate": 1.3440860215053763e-05, + "loss": 0.4148, + "step": 50 + }, + { + "epoch": 0.09767512845296841, + "grad_norm": 0.4167034327983856, + "learning_rate": 1.6129032258064517e-05, + "loss": 0.3393, + "step": 60 + }, + { + "epoch": 0.11395431652846315, + "grad_norm": 0.39410400390625, + "learning_rate": 1.881720430107527e-05, + "loss": 0.2464, + "step": 70 + }, + { + "epoch": 0.13023350460395788, + "grad_norm": 0.3644021153450012, + "learning_rate": 2.1505376344086024e-05, + "loss": 0.2294, + "step": 80 + }, + { + "epoch": 0.1465126926794526, + "grad_norm": 0.30372634530067444, + "learning_rate": 2.4193548387096777e-05, + "loss": 0.2315, + "step": 90 + }, + { + "epoch": 0.16279188075494735, + "grad_norm": 0.2586315870285034, + "learning_rate": 2.4999011923655086e-05, + "loss": 0.1932, + "step": 100 + }, + { + "epoch": 0.17907106883044208, + "grad_norm": 0.37825971841812134, + "learning_rate": 2.4994172742085852e-05, + "loss": 0.2204, + "step": 110 + }, + { + "epoch": 0.19535025690593683, + "grad_norm": 0.21422357857227325, + "learning_rate": 2.4985302531208654e-05, + "loss": 0.1795, + "step": 120 + }, + { + "epoch": 0.21162944498143155, + "grad_norm": 0.2566869854927063, + "learning_rate": 2.4972404152844008e-05, + "loss": 0.1668, + "step": 130 + }, + { + "epoch": 0.2279086330569263, + "grad_norm": 0.28194501996040344, + "learning_rate": 2.49554817684312e-05, + "loss": 0.1476, + "step": 140 + }, + { + "epoch": 0.24418782113242102, + "grad_norm": 0.24139340221881866, + "learning_rate": 2.4934540837685647e-05, + "loss": 0.1609, + "step": 150 + }, + { + "epoch": 0.26046700920791577, + "grad_norm": 0.3306334614753723, + "learning_rate": 2.490958811683741e-05, + "loss": 0.1638, + "step": 160 + }, + { + "epoch": 0.2767461972834105, + "grad_norm": 0.27301114797592163, + "learning_rate": 2.4880631656451447e-05, + "loss": 0.1494, + "step": 170 + }, + { + "epoch": 0.2930253853589052, + "grad_norm": 0.34037259221076965, + "learning_rate": 2.484768079883018e-05, + "loss": 0.1534, + "step": 180 + }, + { + "epoch": 0.30930457343439993, + "grad_norm": 0.2306762933731079, + "learning_rate": 2.4810746174999418e-05, + "loss": 0.1749, + "step": 190 + }, + { + "epoch": 0.3255837615098947, + "grad_norm": 0.3183388113975525, + "learning_rate": 2.476983970127841e-05, + "loss": 0.1482, + "step": 200 + }, + { + "epoch": 0.33372335554764204, + "eval_loss": 0.3089325428009033, + "eval_runtime": 34.8769, + "eval_samples_per_second": 5.419, + "eval_steps_per_second": 5.419, + "step": 205 + }, + { + "epoch": 0.34186294958538943, + "grad_norm": 0.28704971075057983, + "learning_rate": 2.472497457543525e-05, + "loss": 0.1471, + "step": 210 + }, + { + "epoch": 0.35814213766088415, + "grad_norm": 0.2939195930957794, + "learning_rate": 2.4676165272428866e-05, + "loss": 0.1631, + "step": 220 + }, + { + "epoch": 0.3744213257363789, + "grad_norm": 0.31506845355033875, + "learning_rate": 2.4623427539738897e-05, + "loss": 0.1353, + "step": 230 + }, + { + "epoch": 0.39070051381187365, + "grad_norm": 0.3761660158634186, + "learning_rate": 2.456677839228506e-05, + "loss": 0.1716, + "step": 240 + }, + { + "epoch": 0.4069797018873684, + "grad_norm": 0.29187777638435364, + "learning_rate": 2.450623610693757e-05, + "loss": 0.1195, + "step": 250 + }, + { + "epoch": 0.4232588899628631, + "grad_norm": 0.46237581968307495, + "learning_rate": 2.4441820216620425e-05, + "loss": 0.1484, + "step": 260 + }, + { + "epoch": 0.4395380780383578, + "grad_norm": 0.4580917954444885, + "learning_rate": 2.437355150400945e-05, + "loss": 0.1009, + "step": 270 + }, + { + "epoch": 0.4558172661138526, + "grad_norm": 0.4181467890739441, + "learning_rate": 2.4301451994827112e-05, + "loss": 0.1376, + "step": 280 + }, + { + "epoch": 0.4720964541893473, + "grad_norm": 0.3629908561706543, + "learning_rate": 2.422554495073633e-05, + "loss": 0.1083, + "step": 290 + }, + { + "epoch": 0.48837564226484204, + "grad_norm": 0.4282682240009308, + "learning_rate": 2.4145854861835447e-05, + "loss": 0.1373, + "step": 300 + }, + { + "epoch": 0.5046548303403368, + "grad_norm": 0.4914080500602722, + "learning_rate": 2.406240743875699e-05, + "loss": 0.156, + "step": 310 + }, + { + "epoch": 0.5209340184158315, + "grad_norm": 0.3880573511123657, + "learning_rate": 2.3975229604372526e-05, + "loss": 0.1415, + "step": 320 + }, + { + "epoch": 0.5372132064913262, + "grad_norm": 0.42599862813949585, + "learning_rate": 2.3884349485106477e-05, + "loss": 0.1338, + "step": 330 + }, + { + "epoch": 0.553492394566821, + "grad_norm": 0.4339046776294708, + "learning_rate": 2.378979640186163e-05, + "loss": 0.1368, + "step": 340 + }, + { + "epoch": 0.5697715826423158, + "grad_norm": 0.30713170766830444, + "learning_rate": 2.3691600860559222e-05, + "loss": 0.1154, + "step": 350 + }, + { + "epoch": 0.5860507707178104, + "grad_norm": 0.4618566036224365, + "learning_rate": 2.3589794542296764e-05, + "loss": 0.1203, + "step": 360 + }, + { + "epoch": 0.6023299587933052, + "grad_norm": 0.40802672505378723, + "learning_rate": 2.3484410293126664e-05, + "loss": 0.1144, + "step": 370 + }, + { + "epoch": 0.6186091468687999, + "grad_norm": 0.5242702960968018, + "learning_rate": 2.3375482113459014e-05, + "loss": 0.1281, + "step": 380 + }, + { + "epoch": 0.6348883349442946, + "grad_norm": 0.4045926630496979, + "learning_rate": 2.3263045147091944e-05, + "loss": 0.1145, + "step": 390 + }, + { + "epoch": 0.6511675230197894, + "grad_norm": 0.5347346067428589, + "learning_rate": 2.3147135669873096e-05, + "loss": 0.1256, + "step": 400 + }, + { + "epoch": 0.6674467110952841, + "grad_norm": 0.4755608141422272, + "learning_rate": 2.302779107799583e-05, + "loss": 0.1251, + "step": 410 + }, + { + "epoch": 0.6674467110952841, + "eval_loss": 0.26128318905830383, + "eval_runtime": 34.9177, + "eval_samples_per_second": 5.413, + "eval_steps_per_second": 5.413, + "step": 410 + }, + { + "epoch": 0.6837258991707789, + "grad_norm": 0.4720211923122406, + "learning_rate": 2.290504987593399e-05, + "loss": 0.1399, + "step": 420 + }, + { + "epoch": 0.7000050872462736, + "grad_norm": 0.709035336971283, + "learning_rate": 2.2778951664019105e-05, + "loss": 0.1375, + "step": 430 + }, + { + "epoch": 0.7162842753217683, + "grad_norm": 0.534866213798523, + "learning_rate": 2.2649537125664034e-05, + "loss": 0.1125, + "step": 440 + }, + { + "epoch": 0.7325634633972631, + "grad_norm": 0.522056519985199, + "learning_rate": 2.2516848014237146e-05, + "loss": 0.0943, + "step": 450 + }, + { + "epoch": 0.7488426514727577, + "grad_norm": 0.2830965518951416, + "learning_rate": 2.238092713959133e-05, + "loss": 0.1248, + "step": 460 + }, + { + "epoch": 0.7651218395482525, + "grad_norm": 0.39431601762771606, + "learning_rate": 2.2241818354252113e-05, + "loss": 0.1248, + "step": 470 + }, + { + "epoch": 0.7814010276237473, + "grad_norm": 0.4821482002735138, + "learning_rate": 2.209956653926944e-05, + "loss": 0.1359, + "step": 480 + }, + { + "epoch": 0.797680215699242, + "grad_norm": 0.4956236481666565, + "learning_rate": 2.1954217589737535e-05, + "loss": 0.1232, + "step": 490 + }, + { + "epoch": 0.8139594037747367, + "grad_norm": 0.49444642663002014, + "learning_rate": 2.180581839998766e-05, + "loss": 0.1031, + "step": 500 + }, + { + "epoch": 0.8302385918502315, + "grad_norm": 0.3857091963291168, + "learning_rate": 2.165441684845847e-05, + "loss": 0.1023, + "step": 510 + }, + { + "epoch": 0.8465177799257262, + "grad_norm": 0.4830643832683563, + "learning_rate": 2.150006178224886e-05, + "loss": 0.1067, + "step": 520 + }, + { + "epoch": 0.862796968001221, + "grad_norm": 0.5119408965110779, + "learning_rate": 2.1342803001358278e-05, + "loss": 0.1209, + "step": 530 + }, + { + "epoch": 0.8790761560767156, + "grad_norm": 0.46363013982772827, + "learning_rate": 2.118269124261963e-05, + "loss": 0.1134, + "step": 540 + }, + { + "epoch": 0.8953553441522104, + "grad_norm": 0.42933255434036255, + "learning_rate": 2.1019778163329912e-05, + "loss": 0.1101, + "step": 550 + }, + { + "epoch": 0.9116345322277052, + "grad_norm": 0.5474070906639099, + "learning_rate": 2.0854116324583867e-05, + "loss": 0.1291, + "step": 560 + }, + { + "epoch": 0.9279137203031999, + "grad_norm": 0.43502509593963623, + "learning_rate": 2.0685759174316066e-05, + "loss": 0.0936, + "step": 570 + }, + { + "epoch": 0.9441929083786946, + "grad_norm": 0.632621169090271, + "learning_rate": 2.051476103005684e-05, + "loss": 0.1196, + "step": 580 + }, + { + "epoch": 0.9604720964541893, + "grad_norm": 0.553187906742096, + "learning_rate": 2.034117706140768e-05, + "loss": 0.1186, + "step": 590 + }, + { + "epoch": 0.9767512845296841, + "grad_norm": 0.48446330428123474, + "learning_rate": 2.0165063272241712e-05, + "loss": 0.1249, + "step": 600 + }, + { + "epoch": 0.9930304726051788, + "grad_norm": 0.47837090492248535, + "learning_rate": 1.9986476482635003e-05, + "loss": 0.1097, + "step": 610 + }, + { + "epoch": 1.0011700666429262, + "eval_loss": 0.2388339340686798, + "eval_runtime": 34.8769, + "eval_samples_per_second": 5.419, + "eval_steps_per_second": 5.419, + "step": 615 + } + ], + "logging_steps": 10, + "max_steps": 1842, + "num_input_tokens_seen": 0, + "num_train_epochs": 3, + "save_steps": 205, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 5.2542545541955584e+17, + "train_batch_size": 1, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-615/training_args.bin b/checkpoint-615/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..2245771e9af5d2e40208df60ece818e12dad2609 --- /dev/null +++ b/checkpoint-615/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fba26e4087d6f0d4a22136ed77169fa2a327b3a4a3accd6168e4c2098fb9b7b6 +size 5624 diff --git a/checkpoint-820/README.md b/checkpoint-820/README.md new file mode 100644 index 0000000000000000000000000000000000000000..20c165c7feaa880a18e0662941a5b246a698be2e --- /dev/null +++ b/checkpoint-820/README.md @@ -0,0 +1,209 @@ +--- +base_model: Salesforce/Llama-xLAM-2-8b-fc-r +library_name: peft +pipeline_tag: text-generation +tags: +- base_model:adapter:Salesforce/Llama-xLAM-2-8b-fc-r +- lora +- sft +- transformers +- trl +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.17.1 \ No newline at end of file diff --git a/checkpoint-820/adapter_config.json b/checkpoint-820/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..9eda25bff7c128105e91aa1a00507aaa31dd53fd --- /dev/null +++ b/checkpoint-820/adapter_config.json @@ -0,0 +1,39 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "Salesforce/Llama-xLAM-2-8b-fc-r", + "bias": "none", + "corda_config": null, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 32, + "lora_bias": false, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "qalora_group_size": 16, + "r": 16, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "q_proj", + "v_proj", + "k_proj", + "o_proj" + ], + "target_parameters": null, + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/checkpoint-820/adapter_model.safetensors b/checkpoint-820/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0d59579758552a21dc02865303e79bd62b302140 --- /dev/null +++ b/checkpoint-820/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7e7f523720546415c5c2d7c10ecbb396d32222863c20ca94cb0bad255b93373f +size 54560368 diff --git a/checkpoint-820/optimizer.pt b/checkpoint-820/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..19c1cd0671fc1d872cebdd85e38035d846b2d869 --- /dev/null +++ b/checkpoint-820/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:164419262892ac64f22b8f4a1e8aba062f37aa2853ea95ec736ddc4ee50996da +size 109267450 diff --git a/checkpoint-820/rng_state.pth b/checkpoint-820/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..d05d01a4e083d81842bea686073e4b37800c574a --- /dev/null +++ b/checkpoint-820/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a40af1dbd724031020c8eac1e0911f243a098962ec6f049c6ee4abefa4421c6f +size 14244 diff --git a/checkpoint-820/scheduler.pt b/checkpoint-820/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..a75652808542c1cefd2addc616729de7f07bc0e6 --- /dev/null +++ b/checkpoint-820/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:50e33ef20318000cf2331515e23ba515acb8a2914baee960dae35c9f0fc512dc +size 1064 diff --git a/checkpoint-820/special_tokens_map.json b/checkpoint-820/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..1c69f55fd7bb10288d16d4c76497e21e25adfe4f --- /dev/null +++ b/checkpoint-820/special_tokens_map.json @@ -0,0 +1,27 @@ +{ + "additional_special_tokens": [ + "<|eot_id|>", + "<|eom_id|>" + ], + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/checkpoint-820/tokenizer.json b/checkpoint-820/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..1c1d8d5c9024994f1d3b00f9662b8dd89ca13cf2 --- /dev/null +++ b/checkpoint-820/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b9e4e7fb171f92fd137b777cc2714bf87d11576700a1dcd7a399e7bbe39537b +size 17209920 diff --git a/checkpoint-820/tokenizer_config.json b/checkpoint-820/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..3c3fc3b2c234aa8b39e8ee01e3ee9e6fe54a2a1f --- /dev/null +++ b/checkpoint-820/tokenizer_config.json @@ -0,0 +1,2070 @@ +{ + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|python_tag|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "additional_special_tokens": [ + "<|eot_id|>", + "<|eom_id|>" + ], + "bos_token": "<|begin_of_text|>", + "chat_template": "{{- bos_token }}\n{%- if custom_tools is defined %}\n {%- set tools = custom_tools %}\n{%- endif %}\n{%- if not tools_in_user_message is defined %}\n {%- set tools_in_user_message = true %}\n{%- endif %}\n{%- if not tools is defined %}\n {%- set tools = none %}\n{%- endif %}\n\n{#- Extract system message #}\n{{- \"<|start_header_id|>system<|end_header_id|>\\n\\n\" }}\n{%- if messages[0]['role'] == 'system' %}\n {%- set system_message = messages[0]['content'] | trim %}\n {%- set messages = messages[1:] %}\n {{- system_message + \"\\n\" }}\n{%- else %}\n {%- set system_message = \"You are a helpful assistant that can use tools. You are developed by Salesforce xLAM team.\" %}\n {% set format_instruction %}You have access to a set of tools. When using tools, make calls in a single JSON array: \n\n[{\"name\": \"tool_call_name\", \"arguments\": {\"arg1\": \"value1\", \"arg2\": \"value2\"}}, ... (additional parallel tool calls as needed)]\n\nIf no tool is suitable, state that explicitly. If the user's input lacks required parameters, ask for clarification. Do not interpret or respond until tool results are returned. Once they are available, process them or make additional calls if needed. For tasks that don't require tools, such as casual conversation or general advice, respond directly in plain text. The available tools are:{% endset %}\n {{- system_message + \"\\n\" }}\n {%- if tools is not none %}\n {{- format_instruction + \"\\n\\n\" }}\n {%- endif %}\n{%- endif %}\n\n\n{%- if tools is not none %}\n {%- for t in tools %}\n {{- t | tojson(indent=4) }}\n {{- \"\\n\\n\" }}\n {%- endfor %}\n{%- endif %}\n{{- \"<|eot_id|>\" }}\n\n{%- for message in messages %}\n {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %}\n {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\\n\\n'+ message['content'] | trim + '<|eot_id|>' }}\n {%- elif 'tool_calls' in message %}\n {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' -}}\n {%- if message['tool_calls'] %}\n {{- \"[\" }}\n {%- for tool_call_function in message.tool_calls %}\n {%- set tool_call = tool_call_function.function %}\n {{- '{\"name\": \"' + tool_call.name + '\", ' }}\n {{- '\"arguments\": ' }}\n {{- tool_call.arguments | tojson }}\n {{- \"}\" }}\n {%- if not loop.last %}\n {{- \", \" }}\n {%- endif %}\n {%- endfor %}\n {{- \"]\" }}\n {{- \"<|eot_id|>\" }}\n {%- elif message['content'] %}\n {{- message['content'] | trim + '<|eot_id|>' }}\n {%- else %}\n {{- \"[]\\n\" + '<|eot_id|>' }}\n {%- endif %}\n {%- elif message.role == \"tool\" or message.role == \"ipython\" %}\n {{- \"<|start_header_id|>\" + \"ipython\" + \"<|end_header_id|>\\n\\n\" }}\n {%- set content = message[\"content\"] %}\n {%- if content is mapping or (content is iterable and content is not string) %}\n {{- content | tojson }}\n {%- else %}\n {{- content }}\n {%- endif %}\n {{- \"<|eot_id|>\" }}\n {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' }}\n{%- endif %}", + "clean_up_tokenization_spaces": true, + "eos_token": "<|eot_id|>", + "extra_special_tokens": {}, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 16384, + "pad_token": "<|eot_id|>", + "padding_side": "right", + "split_special_tokens": false, + "tokenizer_class": "PreTrainedTokenizerFast" +} diff --git a/checkpoint-820/trainer_state.json b/checkpoint-820/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..e1bc05048e9c8d17fd3facc4e0696e16e66217ac --- /dev/null +++ b/checkpoint-820/trainer_state.json @@ -0,0 +1,639 @@ +{ + "best_metric": 0.22815725207328796, + "best_model_checkpoint": "./xlam_lora_new_ete_over_size_3epoch_multi_mix/checkpoint-820", + "epoch": 1.3348934221905682, + "eval_steps": 205, + "global_step": 820, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.016279188075494735, + "grad_norm": 0.7572630643844604, + "learning_rate": 2.688172043010753e-06, + "loss": 0.5223, + "step": 10 + }, + { + "epoch": 0.03255837615098947, + "grad_norm": 0.417061984539032, + "learning_rate": 5.376344086021506e-06, + "loss": 0.4858, + "step": 20 + }, + { + "epoch": 0.048837564226484206, + "grad_norm": 0.3718095123767853, + "learning_rate": 8.064516129032258e-06, + "loss": 0.4246, + "step": 30 + }, + { + "epoch": 0.06511675230197894, + "grad_norm": 0.2949349582195282, + "learning_rate": 1.0752688172043012e-05, + "loss": 0.4405, + "step": 40 + }, + { + "epoch": 0.08139594037747368, + "grad_norm": 0.3159159719944, + "learning_rate": 1.3440860215053763e-05, + "loss": 0.4148, + "step": 50 + }, + { + "epoch": 0.09767512845296841, + "grad_norm": 0.4167034327983856, + "learning_rate": 1.6129032258064517e-05, + "loss": 0.3393, + "step": 60 + }, + { + "epoch": 0.11395431652846315, + "grad_norm": 0.39410400390625, + "learning_rate": 1.881720430107527e-05, + "loss": 0.2464, + "step": 70 + }, + { + "epoch": 0.13023350460395788, + "grad_norm": 0.3644021153450012, + "learning_rate": 2.1505376344086024e-05, + "loss": 0.2294, + "step": 80 + }, + { + "epoch": 0.1465126926794526, + "grad_norm": 0.30372634530067444, + "learning_rate": 2.4193548387096777e-05, + "loss": 0.2315, + "step": 90 + }, + { + "epoch": 0.16279188075494735, + "grad_norm": 0.2586315870285034, + "learning_rate": 2.4999011923655086e-05, + "loss": 0.1932, + "step": 100 + }, + { + "epoch": 0.17907106883044208, + "grad_norm": 0.37825971841812134, + "learning_rate": 2.4994172742085852e-05, + "loss": 0.2204, + "step": 110 + }, + { + "epoch": 0.19535025690593683, + "grad_norm": 0.21422357857227325, + "learning_rate": 2.4985302531208654e-05, + "loss": 0.1795, + "step": 120 + }, + { + "epoch": 0.21162944498143155, + "grad_norm": 0.2566869854927063, + "learning_rate": 2.4972404152844008e-05, + "loss": 0.1668, + "step": 130 + }, + { + "epoch": 0.2279086330569263, + "grad_norm": 0.28194501996040344, + "learning_rate": 2.49554817684312e-05, + "loss": 0.1476, + "step": 140 + }, + { + "epoch": 0.24418782113242102, + "grad_norm": 0.24139340221881866, + "learning_rate": 2.4934540837685647e-05, + "loss": 0.1609, + "step": 150 + }, + { + "epoch": 0.26046700920791577, + "grad_norm": 0.3306334614753723, + "learning_rate": 2.490958811683741e-05, + "loss": 0.1638, + "step": 160 + }, + { + "epoch": 0.2767461972834105, + "grad_norm": 0.27301114797592163, + "learning_rate": 2.4880631656451447e-05, + "loss": 0.1494, + "step": 170 + }, + { + "epoch": 0.2930253853589052, + "grad_norm": 0.34037259221076965, + "learning_rate": 2.484768079883018e-05, + "loss": 0.1534, + "step": 180 + }, + { + "epoch": 0.30930457343439993, + "grad_norm": 0.2306762933731079, + "learning_rate": 2.4810746174999418e-05, + "loss": 0.1749, + "step": 190 + }, + { + "epoch": 0.3255837615098947, + "grad_norm": 0.3183388113975525, + "learning_rate": 2.476983970127841e-05, + "loss": 0.1482, + "step": 200 + }, + { + "epoch": 0.33372335554764204, + "eval_loss": 0.3089325428009033, + "eval_runtime": 34.8769, + "eval_samples_per_second": 5.419, + "eval_steps_per_second": 5.419, + "step": 205 + }, + { + "epoch": 0.34186294958538943, + "grad_norm": 0.28704971075057983, + "learning_rate": 2.472497457543525e-05, + "loss": 0.1471, + "step": 210 + }, + { + "epoch": 0.35814213766088415, + "grad_norm": 0.2939195930957794, + "learning_rate": 2.4676165272428866e-05, + "loss": 0.1631, + "step": 220 + }, + { + "epoch": 0.3744213257363789, + "grad_norm": 0.31506845355033875, + "learning_rate": 2.4623427539738897e-05, + "loss": 0.1353, + "step": 230 + }, + { + "epoch": 0.39070051381187365, + "grad_norm": 0.3761660158634186, + "learning_rate": 2.456677839228506e-05, + "loss": 0.1716, + "step": 240 + }, + { + "epoch": 0.4069797018873684, + "grad_norm": 0.29187777638435364, + "learning_rate": 2.450623610693757e-05, + "loss": 0.1195, + "step": 250 + }, + { + "epoch": 0.4232588899628631, + "grad_norm": 0.46237581968307495, + "learning_rate": 2.4441820216620425e-05, + "loss": 0.1484, + "step": 260 + }, + { + "epoch": 0.4395380780383578, + "grad_norm": 0.4580917954444885, + "learning_rate": 2.437355150400945e-05, + "loss": 0.1009, + "step": 270 + }, + { + "epoch": 0.4558172661138526, + "grad_norm": 0.4181467890739441, + "learning_rate": 2.4301451994827112e-05, + "loss": 0.1376, + "step": 280 + }, + { + "epoch": 0.4720964541893473, + "grad_norm": 0.3629908561706543, + "learning_rate": 2.422554495073633e-05, + "loss": 0.1083, + "step": 290 + }, + { + "epoch": 0.48837564226484204, + "grad_norm": 0.4282682240009308, + "learning_rate": 2.4145854861835447e-05, + "loss": 0.1373, + "step": 300 + }, + { + "epoch": 0.5046548303403368, + "grad_norm": 0.4914080500602722, + "learning_rate": 2.406240743875699e-05, + "loss": 0.156, + "step": 310 + }, + { + "epoch": 0.5209340184158315, + "grad_norm": 0.3880573511123657, + "learning_rate": 2.3975229604372526e-05, + "loss": 0.1415, + "step": 320 + }, + { + "epoch": 0.5372132064913262, + "grad_norm": 0.42599862813949585, + "learning_rate": 2.3884349485106477e-05, + "loss": 0.1338, + "step": 330 + }, + { + "epoch": 0.553492394566821, + "grad_norm": 0.4339046776294708, + "learning_rate": 2.378979640186163e-05, + "loss": 0.1368, + "step": 340 + }, + { + "epoch": 0.5697715826423158, + "grad_norm": 0.30713170766830444, + "learning_rate": 2.3691600860559222e-05, + "loss": 0.1154, + "step": 350 + }, + { + "epoch": 0.5860507707178104, + "grad_norm": 0.4618566036224365, + "learning_rate": 2.3589794542296764e-05, + "loss": 0.1203, + "step": 360 + }, + { + "epoch": 0.6023299587933052, + "grad_norm": 0.40802672505378723, + "learning_rate": 2.3484410293126664e-05, + "loss": 0.1144, + "step": 370 + }, + { + "epoch": 0.6186091468687999, + "grad_norm": 0.5242702960968018, + "learning_rate": 2.3375482113459014e-05, + "loss": 0.1281, + "step": 380 + }, + { + "epoch": 0.6348883349442946, + "grad_norm": 0.4045926630496979, + "learning_rate": 2.3263045147091944e-05, + "loss": 0.1145, + "step": 390 + }, + { + "epoch": 0.6511675230197894, + "grad_norm": 0.5347346067428589, + "learning_rate": 2.3147135669873096e-05, + "loss": 0.1256, + "step": 400 + }, + { + "epoch": 0.6674467110952841, + "grad_norm": 0.4755608141422272, + "learning_rate": 2.302779107799583e-05, + "loss": 0.1251, + "step": 410 + }, + { + "epoch": 0.6674467110952841, + "eval_loss": 0.26128318905830383, + "eval_runtime": 34.9177, + "eval_samples_per_second": 5.413, + "eval_steps_per_second": 5.413, + "step": 410 + }, + { + "epoch": 0.6837258991707789, + "grad_norm": 0.4720211923122406, + "learning_rate": 2.290504987593399e-05, + "loss": 0.1399, + "step": 420 + }, + { + "epoch": 0.7000050872462736, + "grad_norm": 0.709035336971283, + "learning_rate": 2.2778951664019105e-05, + "loss": 0.1375, + "step": 430 + }, + { + "epoch": 0.7162842753217683, + "grad_norm": 0.534866213798523, + "learning_rate": 2.2649537125664034e-05, + "loss": 0.1125, + "step": 440 + }, + { + "epoch": 0.7325634633972631, + "grad_norm": 0.522056519985199, + "learning_rate": 2.2516848014237146e-05, + "loss": 0.0943, + "step": 450 + }, + { + "epoch": 0.7488426514727577, + "grad_norm": 0.2830965518951416, + "learning_rate": 2.238092713959133e-05, + "loss": 0.1248, + "step": 460 + }, + { + "epoch": 0.7651218395482525, + "grad_norm": 0.39431601762771606, + "learning_rate": 2.2241818354252113e-05, + "loss": 0.1248, + "step": 470 + }, + { + "epoch": 0.7814010276237473, + "grad_norm": 0.4821482002735138, + "learning_rate": 2.209956653926944e-05, + "loss": 0.1359, + "step": 480 + }, + { + "epoch": 0.797680215699242, + "grad_norm": 0.4956236481666565, + "learning_rate": 2.1954217589737535e-05, + "loss": 0.1232, + "step": 490 + }, + { + "epoch": 0.8139594037747367, + "grad_norm": 0.49444642663002014, + "learning_rate": 2.180581839998766e-05, + "loss": 0.1031, + "step": 500 + }, + { + "epoch": 0.8302385918502315, + "grad_norm": 0.3857091963291168, + "learning_rate": 2.165441684845847e-05, + "loss": 0.1023, + "step": 510 + }, + { + "epoch": 0.8465177799257262, + "grad_norm": 0.4830643832683563, + "learning_rate": 2.150006178224886e-05, + "loss": 0.1067, + "step": 520 + }, + { + "epoch": 0.862796968001221, + "grad_norm": 0.5119408965110779, + "learning_rate": 2.1342803001358278e-05, + "loss": 0.1209, + "step": 530 + }, + { + "epoch": 0.8790761560767156, + "grad_norm": 0.46363013982772827, + "learning_rate": 2.118269124261963e-05, + "loss": 0.1134, + "step": 540 + }, + { + "epoch": 0.8953553441522104, + "grad_norm": 0.42933255434036255, + "learning_rate": 2.1019778163329912e-05, + "loss": 0.1101, + "step": 550 + }, + { + "epoch": 0.9116345322277052, + "grad_norm": 0.5474070906639099, + "learning_rate": 2.0854116324583867e-05, + "loss": 0.1291, + "step": 560 + }, + { + "epoch": 0.9279137203031999, + "grad_norm": 0.43502509593963623, + "learning_rate": 2.0685759174316066e-05, + "loss": 0.0936, + "step": 570 + }, + { + "epoch": 0.9441929083786946, + "grad_norm": 0.632621169090271, + "learning_rate": 2.051476103005684e-05, + "loss": 0.1196, + "step": 580 + }, + { + "epoch": 0.9604720964541893, + "grad_norm": 0.553187906742096, + "learning_rate": 2.034117706140768e-05, + "loss": 0.1186, + "step": 590 + }, + { + "epoch": 0.9767512845296841, + "grad_norm": 0.48446330428123474, + "learning_rate": 2.0165063272241712e-05, + "loss": 0.1249, + "step": 600 + }, + { + "epoch": 0.9930304726051788, + "grad_norm": 0.47837090492248535, + "learning_rate": 1.9986476482635003e-05, + "loss": 0.1097, + "step": 610 + }, + { + "epoch": 1.0011700666429262, + "eval_loss": 0.2388339340686798, + "eval_runtime": 34.8769, + "eval_samples_per_second": 5.419, + "eval_steps_per_second": 5.419, + "step": 615 + }, + { + "epoch": 1.0093096606806735, + "grad_norm": 0.5520356893539429, + "learning_rate": 1.980547431053456e-05, + "loss": 0.131, + "step": 620 + }, + { + "epoch": 1.0255888487561682, + "grad_norm": 0.6150078177452087, + "learning_rate": 1.9622115153168884e-05, + "loss": 0.1187, + "step": 630 + }, + { + "epoch": 1.041868036831663, + "grad_norm": 0.5100656151771545, + "learning_rate": 1.9436458168207117e-05, + "loss": 0.114, + "step": 640 + }, + { + "epoch": 1.0581472249071577, + "grad_norm": 0.5156052112579346, + "learning_rate": 1.9248563254672825e-05, + "loss": 0.1099, + "step": 650 + }, + { + "epoch": 1.0744264129826524, + "grad_norm": 0.4662775993347168, + "learning_rate": 1.9058491033618632e-05, + "loss": 0.1135, + "step": 660 + }, + { + "epoch": 1.0907056010581473, + "grad_norm": 0.4357255697250366, + "learning_rate": 1.886630282856787e-05, + "loss": 0.1036, + "step": 670 + }, + { + "epoch": 1.106984789133642, + "grad_norm": 0.3861764967441559, + "learning_rate": 1.867206064572962e-05, + "loss": 0.1145, + "step": 680 + }, + { + "epoch": 1.1232639772091366, + "grad_norm": 0.4562045633792877, + "learning_rate": 1.8475827153993447e-05, + "loss": 0.1107, + "step": 690 + }, + { + "epoch": 1.1395431652846315, + "grad_norm": 0.332917720079422, + "learning_rate": 1.8277665664710387e-05, + "loss": 0.1266, + "step": 700 + }, + { + "epoch": 1.1558223533601262, + "grad_norm": 0.5971720814704895, + "learning_rate": 1.807764011126663e-05, + "loss": 0.1122, + "step": 710 + }, + { + "epoch": 1.1721015414356208, + "grad_norm": 0.6102172136306763, + "learning_rate": 1.787581502845651e-05, + "loss": 0.1046, + "step": 720 + }, + { + "epoch": 1.1883807295111157, + "grad_norm": 0.5294010043144226, + "learning_rate": 1.767225553166146e-05, + "loss": 0.1044, + "step": 730 + }, + { + "epoch": 1.2046599175866104, + "grad_norm": 0.5074148178100586, + "learning_rate": 1.7467027295841688e-05, + "loss": 0.1251, + "step": 740 + }, + { + "epoch": 1.220939105662105, + "grad_norm": 0.6349917650222778, + "learning_rate": 1.7260196534347235e-05, + "loss": 0.1037, + "step": 750 + }, + { + "epoch": 1.2372182937376, + "grad_norm": 0.34580153226852417, + "learning_rate": 1.7051829977555426e-05, + "loss": 0.0831, + "step": 760 + }, + { + "epoch": 1.2534974818130946, + "grad_norm": 0.4629954993724823, + "learning_rate": 1.684199485134144e-05, + "loss": 0.1068, + "step": 770 + }, + { + "epoch": 1.2697766698885893, + "grad_norm": 0.6406750082969666, + "learning_rate": 1.6630758855389055e-05, + "loss": 0.1192, + "step": 780 + }, + { + "epoch": 1.286055857964084, + "grad_norm": 0.4982251226902008, + "learning_rate": 1.6418190141348485e-05, + "loss": 0.123, + "step": 790 + }, + { + "epoch": 1.3023350460395788, + "grad_norm": 0.5146717429161072, + "learning_rate": 1.6204357290848464e-05, + "loss": 0.0831, + "step": 800 + }, + { + "epoch": 1.3186142341150735, + "grad_norm": 0.4735712707042694, + "learning_rate": 1.5989329293369538e-05, + "loss": 0.0971, + "step": 810 + }, + { + "epoch": 1.3348934221905682, + "grad_norm": 0.7393200397491455, + "learning_rate": 1.5773175523985818e-05, + "loss": 0.0923, + "step": 820 + }, + { + "epoch": 1.3348934221905682, + "eval_loss": 0.22815725207328796, + "eval_runtime": 34.8794, + "eval_samples_per_second": 5.419, + "eval_steps_per_second": 5.419, + "step": 820 + } + ], + "logging_steps": 10, + "max_steps": 1842, + "num_input_tokens_seen": 0, + "num_train_epochs": 3, + "save_steps": 205, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 6.989312360723251e+17, + "train_batch_size": 1, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-820/training_args.bin b/checkpoint-820/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..2245771e9af5d2e40208df60ece818e12dad2609 --- /dev/null +++ b/checkpoint-820/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fba26e4087d6f0d4a22136ed77169fa2a327b3a4a3accd6168e4c2098fb9b7b6 +size 5624 diff --git a/final/config.json b/final/config.json new file mode 100644 index 0000000000000000000000000000000000000000..e31d2636403bbac2bc2c61346a86b5adf3b07145 --- /dev/null +++ b/final/config.json @@ -0,0 +1,40 @@ +{ + "_name_or_path": "Salesforce/Llama-xLAM-2-8b-fc-r", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "bos_token_id": 128000, + "eos_token_id": [ + 128001, + 128008, + 128009 + ], + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 4096, + "initializer_range": 0.02, + "intermediate_size": 14336, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "num_attention_heads": 32, + "num_hidden_layers": 32, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 8.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "torch_dtype": "bfloat16", + "transformers_version": "4.46.3", + "use_cache": false, + "vocab_size": 128256 +} diff --git a/final/generation_config.json b/final/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..aba45c8e2bd7d66ff3a5e250c9e9233e137a752c --- /dev/null +++ b/final/generation_config.json @@ -0,0 +1,12 @@ +{ + "bos_token_id": 128000, + "do_sample": true, + "eos_token_id": [ + 128001, + 128008, + 128009 + ], + "temperature": 0.6, + "top_p": 0.9, + "transformers_version": "4.46.3" +} diff --git a/final/model.safetensors.index.json b/final/model.safetensors.index.json new file mode 100644 index 0000000000000000000000000000000000000000..a5349a46ed7d9079ac64dfbb750f494362a71f4e --- /dev/null +++ b/final/model.safetensors.index.json @@ -0,0 +1,554 @@ +{ + "metadata": { + "total_size": 16115048448 + }, + "weight_map": { + "lm_head.weight": "model-00004-of-00004.safetensors", + "model.embed_tokens.weight": "model-00001-of-00004.safetensors", + "model.layers.0.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.0.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.0.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.0.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.0.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.0.self_attn.k_proj.base_layer.weight": "model-00001-of-00004.safetensors", + "model.layers.0.self_attn.k_proj.lora_A.default.weight": "model-00001-of-00004.safetensors", + "model.layers.0.self_attn.k_proj.lora_B.default.weight": "model-00001-of-00004.safetensors", + "model.layers.0.self_attn.o_proj.base_layer.weight": "model-00001-of-00004.safetensors", + "model.layers.0.self_attn.o_proj.lora_A.default.weight": "model-00001-of-00004.safetensors", + "model.layers.0.self_attn.o_proj.lora_B.default.weight": "model-00001-of-00004.safetensors", + "model.layers.0.self_attn.q_proj.base_layer.weight": "model-00001-of-00004.safetensors", + "model.layers.0.self_attn.q_proj.lora_A.default.weight": "model-00001-of-00004.safetensors", + "model.layers.0.self_attn.q_proj.lora_B.default.weight": "model-00001-of-00004.safetensors", + "model.layers.0.self_attn.v_proj.base_layer.weight": "model-00001-of-00004.safetensors", + "model.layers.0.self_attn.v_proj.lora_A.default.weight": "model-00001-of-00004.safetensors", + "model.layers.0.self_attn.v_proj.lora_B.default.weight": "model-00001-of-00004.safetensors", + "model.layers.1.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.1.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.1.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.1.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.1.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.1.self_attn.k_proj.base_layer.weight": "model-00001-of-00004.safetensors", + "model.layers.1.self_attn.k_proj.lora_A.default.weight": "model-00001-of-00004.safetensors", + "model.layers.1.self_attn.k_proj.lora_B.default.weight": "model-00001-of-00004.safetensors", + "model.layers.1.self_attn.o_proj.base_layer.weight": "model-00001-of-00004.safetensors", + "model.layers.1.self_attn.o_proj.lora_A.default.weight": "model-00001-of-00004.safetensors", + "model.layers.1.self_attn.o_proj.lora_B.default.weight": "model-00001-of-00004.safetensors", + "model.layers.1.self_attn.q_proj.base_layer.weight": "model-00001-of-00004.safetensors", + "model.layers.1.self_attn.q_proj.lora_A.default.weight": "model-00001-of-00004.safetensors", + "model.layers.1.self_attn.q_proj.lora_B.default.weight": "model-00001-of-00004.safetensors", + "model.layers.1.self_attn.v_proj.base_layer.weight": "model-00001-of-00004.safetensors", + "model.layers.1.self_attn.v_proj.lora_A.default.weight": "model-00001-of-00004.safetensors", + "model.layers.1.self_attn.v_proj.lora_B.default.weight": "model-00001-of-00004.safetensors", + "model.layers.10.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.10.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.10.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.10.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.10.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.10.self_attn.k_proj.base_layer.weight": "model-00002-of-00004.safetensors", + "model.layers.10.self_attn.k_proj.lora_A.default.weight": "model-00002-of-00004.safetensors", + "model.layers.10.self_attn.k_proj.lora_B.default.weight": "model-00002-of-00004.safetensors", + "model.layers.10.self_attn.o_proj.base_layer.weight": "model-00002-of-00004.safetensors", + "model.layers.10.self_attn.o_proj.lora_A.default.weight": "model-00002-of-00004.safetensors", + "model.layers.10.self_attn.o_proj.lora_B.default.weight": "model-00002-of-00004.safetensors", + "model.layers.10.self_attn.q_proj.base_layer.weight": "model-00002-of-00004.safetensors", + "model.layers.10.self_attn.q_proj.lora_A.default.weight": "model-00002-of-00004.safetensors", + "model.layers.10.self_attn.q_proj.lora_B.default.weight": "model-00002-of-00004.safetensors", + "model.layers.10.self_attn.v_proj.base_layer.weight": "model-00002-of-00004.safetensors", + "model.layers.10.self_attn.v_proj.lora_A.default.weight": "model-00002-of-00004.safetensors", + "model.layers.10.self_attn.v_proj.lora_B.default.weight": "model-00002-of-00004.safetensors", + "model.layers.11.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.11.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.11.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.11.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.11.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.11.self_attn.k_proj.base_layer.weight": "model-00002-of-00004.safetensors", + "model.layers.11.self_attn.k_proj.lora_A.default.weight": "model-00002-of-00004.safetensors", + "model.layers.11.self_attn.k_proj.lora_B.default.weight": "model-00002-of-00004.safetensors", + "model.layers.11.self_attn.o_proj.base_layer.weight": "model-00002-of-00004.safetensors", + "model.layers.11.self_attn.o_proj.lora_A.default.weight": "model-00002-of-00004.safetensors", + "model.layers.11.self_attn.o_proj.lora_B.default.weight": "model-00002-of-00004.safetensors", + "model.layers.11.self_attn.q_proj.base_layer.weight": "model-00002-of-00004.safetensors", + "model.layers.11.self_attn.q_proj.lora_A.default.weight": "model-00002-of-00004.safetensors", + "model.layers.11.self_attn.q_proj.lora_B.default.weight": "model-00002-of-00004.safetensors", + "model.layers.11.self_attn.v_proj.base_layer.weight": "model-00002-of-00004.safetensors", + "model.layers.11.self_attn.v_proj.lora_A.default.weight": "model-00002-of-00004.safetensors", + "model.layers.11.self_attn.v_proj.lora_B.default.weight": "model-00002-of-00004.safetensors", + "model.layers.12.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.12.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.12.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.12.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.12.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.12.self_attn.k_proj.base_layer.weight": "model-00002-of-00004.safetensors", + "model.layers.12.self_attn.k_proj.lora_A.default.weight": "model-00002-of-00004.safetensors", + "model.layers.12.self_attn.k_proj.lora_B.default.weight": "model-00002-of-00004.safetensors", + "model.layers.12.self_attn.o_proj.base_layer.weight": "model-00002-of-00004.safetensors", + "model.layers.12.self_attn.o_proj.lora_A.default.weight": "model-00002-of-00004.safetensors", + "model.layers.12.self_attn.o_proj.lora_B.default.weight": "model-00002-of-00004.safetensors", + "model.layers.12.self_attn.q_proj.base_layer.weight": "model-00002-of-00004.safetensors", + "model.layers.12.self_attn.q_proj.lora_A.default.weight": "model-00002-of-00004.safetensors", + "model.layers.12.self_attn.q_proj.lora_B.default.weight": "model-00002-of-00004.safetensors", + "model.layers.12.self_attn.v_proj.base_layer.weight": "model-00002-of-00004.safetensors", + "model.layers.12.self_attn.v_proj.lora_A.default.weight": "model-00002-of-00004.safetensors", + "model.layers.12.self_attn.v_proj.lora_B.default.weight": "model-00002-of-00004.safetensors", + "model.layers.13.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.13.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.13.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.13.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.13.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.13.self_attn.k_proj.base_layer.weight": "model-00002-of-00004.safetensors", + "model.layers.13.self_attn.k_proj.lora_A.default.weight": "model-00002-of-00004.safetensors", + "model.layers.13.self_attn.k_proj.lora_B.default.weight": "model-00002-of-00004.safetensors", + "model.layers.13.self_attn.o_proj.base_layer.weight": "model-00002-of-00004.safetensors", + "model.layers.13.self_attn.o_proj.lora_A.default.weight": "model-00002-of-00004.safetensors", + "model.layers.13.self_attn.o_proj.lora_B.default.weight": "model-00002-of-00004.safetensors", + "model.layers.13.self_attn.q_proj.base_layer.weight": "model-00002-of-00004.safetensors", + "model.layers.13.self_attn.q_proj.lora_A.default.weight": "model-00002-of-00004.safetensors", + "model.layers.13.self_attn.q_proj.lora_B.default.weight": "model-00002-of-00004.safetensors", + "model.layers.13.self_attn.v_proj.base_layer.weight": "model-00002-of-00004.safetensors", + "model.layers.13.self_attn.v_proj.lora_A.default.weight": "model-00002-of-00004.safetensors", + "model.layers.13.self_attn.v_proj.lora_B.default.weight": "model-00002-of-00004.safetensors", + "model.layers.14.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.14.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.14.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.14.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.14.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.14.self_attn.k_proj.base_layer.weight": "model-00002-of-00004.safetensors", + "model.layers.14.self_attn.k_proj.lora_A.default.weight": "model-00002-of-00004.safetensors", + "model.layers.14.self_attn.k_proj.lora_B.default.weight": "model-00002-of-00004.safetensors", + "model.layers.14.self_attn.o_proj.base_layer.weight": "model-00002-of-00004.safetensors", + "model.layers.14.self_attn.o_proj.lora_A.default.weight": "model-00002-of-00004.safetensors", + "model.layers.14.self_attn.o_proj.lora_B.default.weight": "model-00002-of-00004.safetensors", + "model.layers.14.self_attn.q_proj.base_layer.weight": "model-00002-of-00004.safetensors", + "model.layers.14.self_attn.q_proj.lora_A.default.weight": "model-00002-of-00004.safetensors", + "model.layers.14.self_attn.q_proj.lora_B.default.weight": "model-00002-of-00004.safetensors", + "model.layers.14.self_attn.v_proj.base_layer.weight": "model-00002-of-00004.safetensors", + "model.layers.14.self_attn.v_proj.lora_A.default.weight": "model-00002-of-00004.safetensors", + "model.layers.14.self_attn.v_proj.lora_B.default.weight": "model-00002-of-00004.safetensors", + "model.layers.15.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.15.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.15.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.15.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.15.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.15.self_attn.k_proj.base_layer.weight": "model-00002-of-00004.safetensors", + "model.layers.15.self_attn.k_proj.lora_A.default.weight": "model-00002-of-00004.safetensors", + "model.layers.15.self_attn.k_proj.lora_B.default.weight": "model-00002-of-00004.safetensors", + "model.layers.15.self_attn.o_proj.base_layer.weight": "model-00002-of-00004.safetensors", + "model.layers.15.self_attn.o_proj.lora_A.default.weight": "model-00002-of-00004.safetensors", + "model.layers.15.self_attn.o_proj.lora_B.default.weight": "model-00002-of-00004.safetensors", + "model.layers.15.self_attn.q_proj.base_layer.weight": "model-00002-of-00004.safetensors", + "model.layers.15.self_attn.q_proj.lora_A.default.weight": "model-00002-of-00004.safetensors", + "model.layers.15.self_attn.q_proj.lora_B.default.weight": "model-00002-of-00004.safetensors", + "model.layers.15.self_attn.v_proj.base_layer.weight": "model-00002-of-00004.safetensors", + "model.layers.15.self_attn.v_proj.lora_A.default.weight": "model-00002-of-00004.safetensors", + "model.layers.15.self_attn.v_proj.lora_B.default.weight": "model-00002-of-00004.safetensors", + "model.layers.16.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.16.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.16.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.16.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.16.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.16.self_attn.k_proj.base_layer.weight": "model-00002-of-00004.safetensors", + "model.layers.16.self_attn.k_proj.lora_A.default.weight": "model-00002-of-00004.safetensors", + "model.layers.16.self_attn.k_proj.lora_B.default.weight": "model-00002-of-00004.safetensors", + "model.layers.16.self_attn.o_proj.base_layer.weight": "model-00002-of-00004.safetensors", + "model.layers.16.self_attn.o_proj.lora_A.default.weight": "model-00002-of-00004.safetensors", + "model.layers.16.self_attn.o_proj.lora_B.default.weight": "model-00002-of-00004.safetensors", + "model.layers.16.self_attn.q_proj.base_layer.weight": "model-00002-of-00004.safetensors", + "model.layers.16.self_attn.q_proj.lora_A.default.weight": "model-00002-of-00004.safetensors", + "model.layers.16.self_attn.q_proj.lora_B.default.weight": "model-00002-of-00004.safetensors", + "model.layers.16.self_attn.v_proj.base_layer.weight": "model-00002-of-00004.safetensors", + "model.layers.16.self_attn.v_proj.lora_A.default.weight": "model-00002-of-00004.safetensors", + "model.layers.16.self_attn.v_proj.lora_B.default.weight": "model-00002-of-00004.safetensors", + "model.layers.17.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.17.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.17.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.17.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.17.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.17.self_attn.k_proj.base_layer.weight": "model-00002-of-00004.safetensors", + "model.layers.17.self_attn.k_proj.lora_A.default.weight": "model-00002-of-00004.safetensors", + "model.layers.17.self_attn.k_proj.lora_B.default.weight": "model-00002-of-00004.safetensors", + "model.layers.17.self_attn.o_proj.base_layer.weight": "model-00002-of-00004.safetensors", + "model.layers.17.self_attn.o_proj.lora_A.default.weight": "model-00002-of-00004.safetensors", + "model.layers.17.self_attn.o_proj.lora_B.default.weight": "model-00002-of-00004.safetensors", + "model.layers.17.self_attn.q_proj.base_layer.weight": "model-00002-of-00004.safetensors", + "model.layers.17.self_attn.q_proj.lora_A.default.weight": "model-00002-of-00004.safetensors", + "model.layers.17.self_attn.q_proj.lora_B.default.weight": "model-00002-of-00004.safetensors", + "model.layers.17.self_attn.v_proj.base_layer.weight": "model-00002-of-00004.safetensors", + "model.layers.17.self_attn.v_proj.lora_A.default.weight": "model-00002-of-00004.safetensors", + "model.layers.17.self_attn.v_proj.lora_B.default.weight": "model-00002-of-00004.safetensors", + "model.layers.18.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.18.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.18.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.18.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.18.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.18.self_attn.k_proj.base_layer.weight": "model-00002-of-00004.safetensors", + "model.layers.18.self_attn.k_proj.lora_A.default.weight": "model-00002-of-00004.safetensors", + "model.layers.18.self_attn.k_proj.lora_B.default.weight": "model-00002-of-00004.safetensors", + "model.layers.18.self_attn.o_proj.base_layer.weight": "model-00002-of-00004.safetensors", + "model.layers.18.self_attn.o_proj.lora_A.default.weight": "model-00002-of-00004.safetensors", + "model.layers.18.self_attn.o_proj.lora_B.default.weight": "model-00002-of-00004.safetensors", + "model.layers.18.self_attn.q_proj.base_layer.weight": "model-00002-of-00004.safetensors", + "model.layers.18.self_attn.q_proj.lora_A.default.weight": "model-00002-of-00004.safetensors", + "model.layers.18.self_attn.q_proj.lora_B.default.weight": "model-00002-of-00004.safetensors", + "model.layers.18.self_attn.v_proj.base_layer.weight": "model-00002-of-00004.safetensors", + "model.layers.18.self_attn.v_proj.lora_A.default.weight": "model-00002-of-00004.safetensors", + "model.layers.18.self_attn.v_proj.lora_B.default.weight": "model-00002-of-00004.safetensors", + "model.layers.19.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.19.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.19.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.19.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.19.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.19.self_attn.k_proj.base_layer.weight": "model-00002-of-00004.safetensors", + "model.layers.19.self_attn.k_proj.lora_A.default.weight": "model-00002-of-00004.safetensors", + "model.layers.19.self_attn.k_proj.lora_B.default.weight": "model-00002-of-00004.safetensors", + "model.layers.19.self_attn.o_proj.base_layer.weight": "model-00002-of-00004.safetensors", + "model.layers.19.self_attn.o_proj.lora_A.default.weight": "model-00002-of-00004.safetensors", + "model.layers.19.self_attn.o_proj.lora_B.default.weight": "model-00002-of-00004.safetensors", + "model.layers.19.self_attn.q_proj.base_layer.weight": "model-00002-of-00004.safetensors", + "model.layers.19.self_attn.q_proj.lora_A.default.weight": "model-00002-of-00004.safetensors", + "model.layers.19.self_attn.q_proj.lora_B.default.weight": "model-00002-of-00004.safetensors", + "model.layers.19.self_attn.v_proj.base_layer.weight": "model-00002-of-00004.safetensors", + "model.layers.19.self_attn.v_proj.lora_A.default.weight": "model-00002-of-00004.safetensors", + "model.layers.19.self_attn.v_proj.lora_B.default.weight": "model-00002-of-00004.safetensors", + "model.layers.2.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.2.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.2.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.2.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.2.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.2.self_attn.k_proj.base_layer.weight": "model-00001-of-00004.safetensors", + "model.layers.2.self_attn.k_proj.lora_A.default.weight": "model-00001-of-00004.safetensors", + "model.layers.2.self_attn.k_proj.lora_B.default.weight": "model-00001-of-00004.safetensors", + "model.layers.2.self_attn.o_proj.base_layer.weight": "model-00001-of-00004.safetensors", + "model.layers.2.self_attn.o_proj.lora_A.default.weight": "model-00001-of-00004.safetensors", + "model.layers.2.self_attn.o_proj.lora_B.default.weight": "model-00001-of-00004.safetensors", + "model.layers.2.self_attn.q_proj.base_layer.weight": "model-00001-of-00004.safetensors", + "model.layers.2.self_attn.q_proj.lora_A.default.weight": "model-00001-of-00004.safetensors", + "model.layers.2.self_attn.q_proj.lora_B.default.weight": "model-00001-of-00004.safetensors", + "model.layers.2.self_attn.v_proj.base_layer.weight": "model-00001-of-00004.safetensors", + "model.layers.2.self_attn.v_proj.lora_A.default.weight": "model-00001-of-00004.safetensors", + "model.layers.2.self_attn.v_proj.lora_B.default.weight": "model-00001-of-00004.safetensors", + "model.layers.20.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.20.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.20.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.20.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.20.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.20.self_attn.k_proj.base_layer.weight": "model-00002-of-00004.safetensors", + "model.layers.20.self_attn.k_proj.lora_A.default.weight": "model-00002-of-00004.safetensors", + "model.layers.20.self_attn.k_proj.lora_B.default.weight": "model-00002-of-00004.safetensors", + "model.layers.20.self_attn.o_proj.base_layer.weight": "model-00002-of-00004.safetensors", + "model.layers.20.self_attn.o_proj.lora_A.default.weight": "model-00002-of-00004.safetensors", + "model.layers.20.self_attn.o_proj.lora_B.default.weight": "model-00002-of-00004.safetensors", + "model.layers.20.self_attn.q_proj.base_layer.weight": "model-00002-of-00004.safetensors", + "model.layers.20.self_attn.q_proj.lora_A.default.weight": "model-00002-of-00004.safetensors", + "model.layers.20.self_attn.q_proj.lora_B.default.weight": "model-00002-of-00004.safetensors", + "model.layers.20.self_attn.v_proj.base_layer.weight": "model-00002-of-00004.safetensors", + "model.layers.20.self_attn.v_proj.lora_A.default.weight": "model-00002-of-00004.safetensors", + "model.layers.20.self_attn.v_proj.lora_B.default.weight": "model-00002-of-00004.safetensors", + "model.layers.21.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.21.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.21.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.21.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.21.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.21.self_attn.k_proj.base_layer.weight": "model-00003-of-00004.safetensors", + "model.layers.21.self_attn.k_proj.lora_A.default.weight": "model-00003-of-00004.safetensors", + "model.layers.21.self_attn.k_proj.lora_B.default.weight": "model-00003-of-00004.safetensors", + "model.layers.21.self_attn.o_proj.base_layer.weight": "model-00003-of-00004.safetensors", + "model.layers.21.self_attn.o_proj.lora_A.default.weight": "model-00003-of-00004.safetensors", + "model.layers.21.self_attn.o_proj.lora_B.default.weight": "model-00003-of-00004.safetensors", + "model.layers.21.self_attn.q_proj.base_layer.weight": "model-00003-of-00004.safetensors", + "model.layers.21.self_attn.q_proj.lora_A.default.weight": "model-00003-of-00004.safetensors", + "model.layers.21.self_attn.q_proj.lora_B.default.weight": "model-00003-of-00004.safetensors", + "model.layers.21.self_attn.v_proj.base_layer.weight": "model-00003-of-00004.safetensors", + "model.layers.21.self_attn.v_proj.lora_A.default.weight": "model-00003-of-00004.safetensors", + "model.layers.21.self_attn.v_proj.lora_B.default.weight": "model-00003-of-00004.safetensors", + "model.layers.22.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.22.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.22.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.22.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.22.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.22.self_attn.k_proj.base_layer.weight": "model-00003-of-00004.safetensors", + "model.layers.22.self_attn.k_proj.lora_A.default.weight": "model-00003-of-00004.safetensors", + "model.layers.22.self_attn.k_proj.lora_B.default.weight": "model-00003-of-00004.safetensors", + "model.layers.22.self_attn.o_proj.base_layer.weight": "model-00003-of-00004.safetensors", + "model.layers.22.self_attn.o_proj.lora_A.default.weight": "model-00003-of-00004.safetensors", + "model.layers.22.self_attn.o_proj.lora_B.default.weight": "model-00003-of-00004.safetensors", + "model.layers.22.self_attn.q_proj.base_layer.weight": "model-00003-of-00004.safetensors", + "model.layers.22.self_attn.q_proj.lora_A.default.weight": "model-00003-of-00004.safetensors", + "model.layers.22.self_attn.q_proj.lora_B.default.weight": "model-00003-of-00004.safetensors", + "model.layers.22.self_attn.v_proj.base_layer.weight": "model-00003-of-00004.safetensors", + "model.layers.22.self_attn.v_proj.lora_A.default.weight": "model-00003-of-00004.safetensors", + "model.layers.22.self_attn.v_proj.lora_B.default.weight": "model-00003-of-00004.safetensors", + "model.layers.23.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.23.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.23.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.23.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.23.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.23.self_attn.k_proj.base_layer.weight": "model-00003-of-00004.safetensors", + "model.layers.23.self_attn.k_proj.lora_A.default.weight": "model-00003-of-00004.safetensors", + "model.layers.23.self_attn.k_proj.lora_B.default.weight": "model-00003-of-00004.safetensors", + "model.layers.23.self_attn.o_proj.base_layer.weight": "model-00003-of-00004.safetensors", + "model.layers.23.self_attn.o_proj.lora_A.default.weight": "model-00003-of-00004.safetensors", + "model.layers.23.self_attn.o_proj.lora_B.default.weight": "model-00003-of-00004.safetensors", + "model.layers.23.self_attn.q_proj.base_layer.weight": "model-00003-of-00004.safetensors", + "model.layers.23.self_attn.q_proj.lora_A.default.weight": "model-00003-of-00004.safetensors", + "model.layers.23.self_attn.q_proj.lora_B.default.weight": "model-00003-of-00004.safetensors", + "model.layers.23.self_attn.v_proj.base_layer.weight": "model-00003-of-00004.safetensors", + "model.layers.23.self_attn.v_proj.lora_A.default.weight": "model-00003-of-00004.safetensors", + "model.layers.23.self_attn.v_proj.lora_B.default.weight": "model-00003-of-00004.safetensors", + "model.layers.24.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.24.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.24.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.24.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.24.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.24.self_attn.k_proj.base_layer.weight": "model-00003-of-00004.safetensors", + "model.layers.24.self_attn.k_proj.lora_A.default.weight": "model-00003-of-00004.safetensors", + "model.layers.24.self_attn.k_proj.lora_B.default.weight": "model-00003-of-00004.safetensors", + "model.layers.24.self_attn.o_proj.base_layer.weight": "model-00003-of-00004.safetensors", + "model.layers.24.self_attn.o_proj.lora_A.default.weight": "model-00003-of-00004.safetensors", + "model.layers.24.self_attn.o_proj.lora_B.default.weight": "model-00003-of-00004.safetensors", + "model.layers.24.self_attn.q_proj.base_layer.weight": "model-00003-of-00004.safetensors", + "model.layers.24.self_attn.q_proj.lora_A.default.weight": "model-00003-of-00004.safetensors", + "model.layers.24.self_attn.q_proj.lora_B.default.weight": "model-00003-of-00004.safetensors", + "model.layers.24.self_attn.v_proj.base_layer.weight": "model-00003-of-00004.safetensors", + "model.layers.24.self_attn.v_proj.lora_A.default.weight": "model-00003-of-00004.safetensors", + "model.layers.24.self_attn.v_proj.lora_B.default.weight": "model-00003-of-00004.safetensors", + "model.layers.25.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.25.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.25.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.25.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.25.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.25.self_attn.k_proj.base_layer.weight": "model-00003-of-00004.safetensors", + "model.layers.25.self_attn.k_proj.lora_A.default.weight": "model-00003-of-00004.safetensors", + "model.layers.25.self_attn.k_proj.lora_B.default.weight": "model-00003-of-00004.safetensors", + "model.layers.25.self_attn.o_proj.base_layer.weight": "model-00003-of-00004.safetensors", + "model.layers.25.self_attn.o_proj.lora_A.default.weight": "model-00003-of-00004.safetensors", + "model.layers.25.self_attn.o_proj.lora_B.default.weight": "model-00003-of-00004.safetensors", + "model.layers.25.self_attn.q_proj.base_layer.weight": "model-00003-of-00004.safetensors", + "model.layers.25.self_attn.q_proj.lora_A.default.weight": "model-00003-of-00004.safetensors", + "model.layers.25.self_attn.q_proj.lora_B.default.weight": "model-00003-of-00004.safetensors", + "model.layers.25.self_attn.v_proj.base_layer.weight": "model-00003-of-00004.safetensors", + "model.layers.25.self_attn.v_proj.lora_A.default.weight": "model-00003-of-00004.safetensors", + "model.layers.25.self_attn.v_proj.lora_B.default.weight": "model-00003-of-00004.safetensors", + "model.layers.26.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.26.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.26.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.26.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.26.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.26.self_attn.k_proj.base_layer.weight": "model-00003-of-00004.safetensors", + "model.layers.26.self_attn.k_proj.lora_A.default.weight": "model-00003-of-00004.safetensors", + "model.layers.26.self_attn.k_proj.lora_B.default.weight": "model-00003-of-00004.safetensors", + "model.layers.26.self_attn.o_proj.base_layer.weight": "model-00003-of-00004.safetensors", + "model.layers.26.self_attn.o_proj.lora_A.default.weight": "model-00003-of-00004.safetensors", + "model.layers.26.self_attn.o_proj.lora_B.default.weight": "model-00003-of-00004.safetensors", + "model.layers.26.self_attn.q_proj.base_layer.weight": "model-00003-of-00004.safetensors", + "model.layers.26.self_attn.q_proj.lora_A.default.weight": "model-00003-of-00004.safetensors", + "model.layers.26.self_attn.q_proj.lora_B.default.weight": "model-00003-of-00004.safetensors", + "model.layers.26.self_attn.v_proj.base_layer.weight": "model-00003-of-00004.safetensors", + "model.layers.26.self_attn.v_proj.lora_A.default.weight": "model-00003-of-00004.safetensors", + "model.layers.26.self_attn.v_proj.lora_B.default.weight": "model-00003-of-00004.safetensors", + "model.layers.27.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.27.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.27.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.27.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.27.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.27.self_attn.k_proj.base_layer.weight": "model-00003-of-00004.safetensors", + "model.layers.27.self_attn.k_proj.lora_A.default.weight": "model-00003-of-00004.safetensors", + "model.layers.27.self_attn.k_proj.lora_B.default.weight": "model-00003-of-00004.safetensors", + "model.layers.27.self_attn.o_proj.base_layer.weight": "model-00003-of-00004.safetensors", + "model.layers.27.self_attn.o_proj.lora_A.default.weight": "model-00003-of-00004.safetensors", + "model.layers.27.self_attn.o_proj.lora_B.default.weight": "model-00003-of-00004.safetensors", + "model.layers.27.self_attn.q_proj.base_layer.weight": "model-00003-of-00004.safetensors", + "model.layers.27.self_attn.q_proj.lora_A.default.weight": "model-00003-of-00004.safetensors", + "model.layers.27.self_attn.q_proj.lora_B.default.weight": "model-00003-of-00004.safetensors", + "model.layers.27.self_attn.v_proj.base_layer.weight": "model-00003-of-00004.safetensors", + "model.layers.27.self_attn.v_proj.lora_A.default.weight": "model-00003-of-00004.safetensors", + "model.layers.27.self_attn.v_proj.lora_B.default.weight": "model-00003-of-00004.safetensors", + "model.layers.28.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.28.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.28.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.28.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.28.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.28.self_attn.k_proj.base_layer.weight": "model-00003-of-00004.safetensors", + "model.layers.28.self_attn.k_proj.lora_A.default.weight": "model-00003-of-00004.safetensors", + "model.layers.28.self_attn.k_proj.lora_B.default.weight": "model-00003-of-00004.safetensors", + "model.layers.28.self_attn.o_proj.base_layer.weight": "model-00003-of-00004.safetensors", + "model.layers.28.self_attn.o_proj.lora_A.default.weight": "model-00003-of-00004.safetensors", + "model.layers.28.self_attn.o_proj.lora_B.default.weight": "model-00003-of-00004.safetensors", + "model.layers.28.self_attn.q_proj.base_layer.weight": "model-00003-of-00004.safetensors", + "model.layers.28.self_attn.q_proj.lora_A.default.weight": "model-00003-of-00004.safetensors", + "model.layers.28.self_attn.q_proj.lora_B.default.weight": "model-00003-of-00004.safetensors", + "model.layers.28.self_attn.v_proj.base_layer.weight": "model-00003-of-00004.safetensors", + "model.layers.28.self_attn.v_proj.lora_A.default.weight": "model-00003-of-00004.safetensors", + "model.layers.28.self_attn.v_proj.lora_B.default.weight": "model-00003-of-00004.safetensors", + "model.layers.29.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.29.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.29.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.29.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.29.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.29.self_attn.k_proj.base_layer.weight": "model-00003-of-00004.safetensors", + "model.layers.29.self_attn.k_proj.lora_A.default.weight": "model-00003-of-00004.safetensors", + "model.layers.29.self_attn.k_proj.lora_B.default.weight": "model-00003-of-00004.safetensors", + "model.layers.29.self_attn.o_proj.base_layer.weight": "model-00003-of-00004.safetensors", + "model.layers.29.self_attn.o_proj.lora_A.default.weight": "model-00003-of-00004.safetensors", + "model.layers.29.self_attn.o_proj.lora_B.default.weight": "model-00003-of-00004.safetensors", + "model.layers.29.self_attn.q_proj.base_layer.weight": "model-00003-of-00004.safetensors", + "model.layers.29.self_attn.q_proj.lora_A.default.weight": "model-00003-of-00004.safetensors", + "model.layers.29.self_attn.q_proj.lora_B.default.weight": "model-00003-of-00004.safetensors", + "model.layers.29.self_attn.v_proj.base_layer.weight": "model-00003-of-00004.safetensors", + "model.layers.29.self_attn.v_proj.lora_A.default.weight": "model-00003-of-00004.safetensors", + "model.layers.29.self_attn.v_proj.lora_B.default.weight": "model-00003-of-00004.safetensors", + "model.layers.3.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.3.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.3.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.3.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.3.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.3.self_attn.k_proj.base_layer.weight": "model-00001-of-00004.safetensors", + "model.layers.3.self_attn.k_proj.lora_A.default.weight": "model-00001-of-00004.safetensors", + "model.layers.3.self_attn.k_proj.lora_B.default.weight": "model-00001-of-00004.safetensors", + "model.layers.3.self_attn.o_proj.base_layer.weight": "model-00001-of-00004.safetensors", + "model.layers.3.self_attn.o_proj.lora_A.default.weight": "model-00001-of-00004.safetensors", + "model.layers.3.self_attn.o_proj.lora_B.default.weight": "model-00001-of-00004.safetensors", + "model.layers.3.self_attn.q_proj.base_layer.weight": "model-00001-of-00004.safetensors", + "model.layers.3.self_attn.q_proj.lora_A.default.weight": "model-00001-of-00004.safetensors", + "model.layers.3.self_attn.q_proj.lora_B.default.weight": "model-00001-of-00004.safetensors", + "model.layers.3.self_attn.v_proj.base_layer.weight": "model-00001-of-00004.safetensors", + "model.layers.3.self_attn.v_proj.lora_A.default.weight": "model-00001-of-00004.safetensors", + "model.layers.3.self_attn.v_proj.lora_B.default.weight": "model-00001-of-00004.safetensors", + "model.layers.30.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.30.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.30.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.30.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.30.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.30.self_attn.k_proj.base_layer.weight": "model-00003-of-00004.safetensors", + "model.layers.30.self_attn.k_proj.lora_A.default.weight": "model-00003-of-00004.safetensors", + "model.layers.30.self_attn.k_proj.lora_B.default.weight": "model-00003-of-00004.safetensors", + "model.layers.30.self_attn.o_proj.base_layer.weight": "model-00003-of-00004.safetensors", + "model.layers.30.self_attn.o_proj.lora_A.default.weight": "model-00003-of-00004.safetensors", + "model.layers.30.self_attn.o_proj.lora_B.default.weight": "model-00003-of-00004.safetensors", + "model.layers.30.self_attn.q_proj.base_layer.weight": "model-00003-of-00004.safetensors", + "model.layers.30.self_attn.q_proj.lora_A.default.weight": "model-00003-of-00004.safetensors", + "model.layers.30.self_attn.q_proj.lora_B.default.weight": "model-00003-of-00004.safetensors", + "model.layers.30.self_attn.v_proj.base_layer.weight": "model-00003-of-00004.safetensors", + "model.layers.30.self_attn.v_proj.lora_A.default.weight": "model-00003-of-00004.safetensors", + "model.layers.30.self_attn.v_proj.lora_B.default.weight": "model-00003-of-00004.safetensors", + "model.layers.31.input_layernorm.weight": "model-00004-of-00004.safetensors", + "model.layers.31.mlp.down_proj.weight": "model-00004-of-00004.safetensors", + "model.layers.31.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.31.mlp.up_proj.weight": "model-00004-of-00004.safetensors", + "model.layers.31.post_attention_layernorm.weight": "model-00004-of-00004.safetensors", + "model.layers.31.self_attn.k_proj.base_layer.weight": "model-00003-of-00004.safetensors", + "model.layers.31.self_attn.k_proj.lora_A.default.weight": "model-00003-of-00004.safetensors", + "model.layers.31.self_attn.k_proj.lora_B.default.weight": "model-00003-of-00004.safetensors", + "model.layers.31.self_attn.o_proj.base_layer.weight": "model-00003-of-00004.safetensors", + "model.layers.31.self_attn.o_proj.lora_A.default.weight": "model-00003-of-00004.safetensors", + "model.layers.31.self_attn.o_proj.lora_B.default.weight": "model-00003-of-00004.safetensors", + "model.layers.31.self_attn.q_proj.base_layer.weight": "model-00003-of-00004.safetensors", + "model.layers.31.self_attn.q_proj.lora_A.default.weight": "model-00003-of-00004.safetensors", + "model.layers.31.self_attn.q_proj.lora_B.default.weight": "model-00003-of-00004.safetensors", + "model.layers.31.self_attn.v_proj.base_layer.weight": "model-00003-of-00004.safetensors", + "model.layers.31.self_attn.v_proj.lora_A.default.weight": "model-00003-of-00004.safetensors", + "model.layers.31.self_attn.v_proj.lora_B.default.weight": "model-00003-of-00004.safetensors", + "model.layers.4.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.4.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.4.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.4.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.4.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.4.self_attn.k_proj.base_layer.weight": "model-00001-of-00004.safetensors", + "model.layers.4.self_attn.k_proj.lora_A.default.weight": "model-00001-of-00004.safetensors", + "model.layers.4.self_attn.k_proj.lora_B.default.weight": "model-00001-of-00004.safetensors", + "model.layers.4.self_attn.o_proj.base_layer.weight": "model-00001-of-00004.safetensors", + "model.layers.4.self_attn.o_proj.lora_A.default.weight": "model-00001-of-00004.safetensors", + "model.layers.4.self_attn.o_proj.lora_B.default.weight": "model-00001-of-00004.safetensors", + "model.layers.4.self_attn.q_proj.base_layer.weight": "model-00001-of-00004.safetensors", + "model.layers.4.self_attn.q_proj.lora_A.default.weight": "model-00001-of-00004.safetensors", + "model.layers.4.self_attn.q_proj.lora_B.default.weight": "model-00001-of-00004.safetensors", + "model.layers.4.self_attn.v_proj.base_layer.weight": "model-00001-of-00004.safetensors", + "model.layers.4.self_attn.v_proj.lora_A.default.weight": "model-00001-of-00004.safetensors", + "model.layers.4.self_attn.v_proj.lora_B.default.weight": "model-00001-of-00004.safetensors", + "model.layers.5.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.5.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.5.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.5.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.5.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.5.self_attn.k_proj.base_layer.weight": "model-00001-of-00004.safetensors", + "model.layers.5.self_attn.k_proj.lora_A.default.weight": "model-00001-of-00004.safetensors", + "model.layers.5.self_attn.k_proj.lora_B.default.weight": "model-00001-of-00004.safetensors", + "model.layers.5.self_attn.o_proj.base_layer.weight": "model-00001-of-00004.safetensors", + "model.layers.5.self_attn.o_proj.lora_A.default.weight": "model-00001-of-00004.safetensors", + "model.layers.5.self_attn.o_proj.lora_B.default.weight": "model-00001-of-00004.safetensors", + "model.layers.5.self_attn.q_proj.base_layer.weight": "model-00001-of-00004.safetensors", + "model.layers.5.self_attn.q_proj.lora_A.default.weight": "model-00001-of-00004.safetensors", + "model.layers.5.self_attn.q_proj.lora_B.default.weight": "model-00001-of-00004.safetensors", + "model.layers.5.self_attn.v_proj.base_layer.weight": "model-00001-of-00004.safetensors", + "model.layers.5.self_attn.v_proj.lora_A.default.weight": "model-00001-of-00004.safetensors", + "model.layers.5.self_attn.v_proj.lora_B.default.weight": "model-00001-of-00004.safetensors", + "model.layers.6.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.6.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.6.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.6.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.6.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.6.self_attn.k_proj.base_layer.weight": "model-00001-of-00004.safetensors", + "model.layers.6.self_attn.k_proj.lora_A.default.weight": "model-00001-of-00004.safetensors", + "model.layers.6.self_attn.k_proj.lora_B.default.weight": "model-00001-of-00004.safetensors", + "model.layers.6.self_attn.o_proj.base_layer.weight": "model-00001-of-00004.safetensors", + "model.layers.6.self_attn.o_proj.lora_A.default.weight": "model-00001-of-00004.safetensors", + "model.layers.6.self_attn.o_proj.lora_B.default.weight": "model-00001-of-00004.safetensors", + "model.layers.6.self_attn.q_proj.base_layer.weight": "model-00001-of-00004.safetensors", + "model.layers.6.self_attn.q_proj.lora_A.default.weight": "model-00001-of-00004.safetensors", + "model.layers.6.self_attn.q_proj.lora_B.default.weight": "model-00001-of-00004.safetensors", + "model.layers.6.self_attn.v_proj.base_layer.weight": "model-00001-of-00004.safetensors", + "model.layers.6.self_attn.v_proj.lora_A.default.weight": "model-00001-of-00004.safetensors", + "model.layers.6.self_attn.v_proj.lora_B.default.weight": "model-00001-of-00004.safetensors", + "model.layers.7.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.7.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.7.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.7.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.7.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.7.self_attn.k_proj.base_layer.weight": "model-00001-of-00004.safetensors", + "model.layers.7.self_attn.k_proj.lora_A.default.weight": "model-00001-of-00004.safetensors", + "model.layers.7.self_attn.k_proj.lora_B.default.weight": "model-00001-of-00004.safetensors", + "model.layers.7.self_attn.o_proj.base_layer.weight": "model-00001-of-00004.safetensors", + "model.layers.7.self_attn.o_proj.lora_A.default.weight": "model-00001-of-00004.safetensors", + "model.layers.7.self_attn.o_proj.lora_B.default.weight": "model-00001-of-00004.safetensors", + "model.layers.7.self_attn.q_proj.base_layer.weight": "model-00001-of-00004.safetensors", + "model.layers.7.self_attn.q_proj.lora_A.default.weight": "model-00001-of-00004.safetensors", + "model.layers.7.self_attn.q_proj.lora_B.default.weight": "model-00001-of-00004.safetensors", + "model.layers.7.self_attn.v_proj.base_layer.weight": "model-00001-of-00004.safetensors", + "model.layers.7.self_attn.v_proj.lora_A.default.weight": "model-00001-of-00004.safetensors", + "model.layers.7.self_attn.v_proj.lora_B.default.weight": "model-00001-of-00004.safetensors", + "model.layers.8.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.8.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.8.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.8.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.8.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.8.self_attn.k_proj.base_layer.weight": "model-00001-of-00004.safetensors", + "model.layers.8.self_attn.k_proj.lora_A.default.weight": "model-00001-of-00004.safetensors", + "model.layers.8.self_attn.k_proj.lora_B.default.weight": "model-00001-of-00004.safetensors", + "model.layers.8.self_attn.o_proj.base_layer.weight": "model-00001-of-00004.safetensors", + "model.layers.8.self_attn.o_proj.lora_A.default.weight": "model-00001-of-00004.safetensors", + "model.layers.8.self_attn.o_proj.lora_B.default.weight": "model-00001-of-00004.safetensors", + "model.layers.8.self_attn.q_proj.base_layer.weight": "model-00001-of-00004.safetensors", + "model.layers.8.self_attn.q_proj.lora_A.default.weight": "model-00001-of-00004.safetensors", + "model.layers.8.self_attn.q_proj.lora_B.default.weight": "model-00001-of-00004.safetensors", + "model.layers.8.self_attn.v_proj.base_layer.weight": "model-00001-of-00004.safetensors", + "model.layers.8.self_attn.v_proj.lora_A.default.weight": "model-00001-of-00004.safetensors", + "model.layers.8.self_attn.v_proj.lora_B.default.weight": "model-00001-of-00004.safetensors", + "model.layers.9.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.9.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.9.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.9.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.9.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.9.self_attn.k_proj.base_layer.weight": "model-00002-of-00004.safetensors", + "model.layers.9.self_attn.k_proj.lora_A.default.weight": "model-00002-of-00004.safetensors", + "model.layers.9.self_attn.k_proj.lora_B.default.weight": "model-00002-of-00004.safetensors", + "model.layers.9.self_attn.o_proj.base_layer.weight": "model-00002-of-00004.safetensors", + "model.layers.9.self_attn.o_proj.lora_A.default.weight": "model-00002-of-00004.safetensors", + "model.layers.9.self_attn.o_proj.lora_B.default.weight": "model-00002-of-00004.safetensors", + "model.layers.9.self_attn.q_proj.base_layer.weight": "model-00002-of-00004.safetensors", + "model.layers.9.self_attn.q_proj.lora_A.default.weight": "model-00002-of-00004.safetensors", + "model.layers.9.self_attn.q_proj.lora_B.default.weight": "model-00002-of-00004.safetensors", + "model.layers.9.self_attn.v_proj.base_layer.weight": "model-00002-of-00004.safetensors", + "model.layers.9.self_attn.v_proj.lora_A.default.weight": "model-00002-of-00004.safetensors", + "model.layers.9.self_attn.v_proj.lora_B.default.weight": "model-00002-of-00004.safetensors", + "model.norm.weight": "model-00004-of-00004.safetensors" + } +} diff --git a/final/special_tokens_map.json b/final/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..1c69f55fd7bb10288d16d4c76497e21e25adfe4f --- /dev/null +++ b/final/special_tokens_map.json @@ -0,0 +1,27 @@ +{ + "additional_special_tokens": [ + "<|eot_id|>", + "<|eom_id|>" + ], + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/final/tokenizer.json b/final/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..1c1d8d5c9024994f1d3b00f9662b8dd89ca13cf2 --- /dev/null +++ b/final/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b9e4e7fb171f92fd137b777cc2714bf87d11576700a1dcd7a399e7bbe39537b +size 17209920 diff --git a/final/tokenizer_config.json b/final/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..3c3fc3b2c234aa8b39e8ee01e3ee9e6fe54a2a1f --- /dev/null +++ b/final/tokenizer_config.json @@ -0,0 +1,2070 @@ +{ + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|python_tag|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "additional_special_tokens": [ + "<|eot_id|>", + "<|eom_id|>" + ], + "bos_token": "<|begin_of_text|>", + "chat_template": "{{- bos_token }}\n{%- if custom_tools is defined %}\n {%- set tools = custom_tools %}\n{%- endif %}\n{%- if not tools_in_user_message is defined %}\n {%- set tools_in_user_message = true %}\n{%- endif %}\n{%- if not tools is defined %}\n {%- set tools = none %}\n{%- endif %}\n\n{#- Extract system message #}\n{{- \"<|start_header_id|>system<|end_header_id|>\\n\\n\" }}\n{%- if messages[0]['role'] == 'system' %}\n {%- set system_message = messages[0]['content'] | trim %}\n {%- set messages = messages[1:] %}\n {{- system_message + \"\\n\" }}\n{%- else %}\n {%- set system_message = \"You are a helpful assistant that can use tools. You are developed by Salesforce xLAM team.\" %}\n {% set format_instruction %}You have access to a set of tools. When using tools, make calls in a single JSON array: \n\n[{\"name\": \"tool_call_name\", \"arguments\": {\"arg1\": \"value1\", \"arg2\": \"value2\"}}, ... (additional parallel tool calls as needed)]\n\nIf no tool is suitable, state that explicitly. If the user's input lacks required parameters, ask for clarification. Do not interpret or respond until tool results are returned. Once they are available, process them or make additional calls if needed. For tasks that don't require tools, such as casual conversation or general advice, respond directly in plain text. The available tools are:{% endset %}\n {{- system_message + \"\\n\" }}\n {%- if tools is not none %}\n {{- format_instruction + \"\\n\\n\" }}\n {%- endif %}\n{%- endif %}\n\n\n{%- if tools is not none %}\n {%- for t in tools %}\n {{- t | tojson(indent=4) }}\n {{- \"\\n\\n\" }}\n {%- endfor %}\n{%- endif %}\n{{- \"<|eot_id|>\" }}\n\n{%- for message in messages %}\n {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %}\n {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\\n\\n'+ message['content'] | trim + '<|eot_id|>' }}\n {%- elif 'tool_calls' in message %}\n {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' -}}\n {%- if message['tool_calls'] %}\n {{- \"[\" }}\n {%- for tool_call_function in message.tool_calls %}\n {%- set tool_call = tool_call_function.function %}\n {{- '{\"name\": \"' + tool_call.name + '\", ' }}\n {{- '\"arguments\": ' }}\n {{- tool_call.arguments | tojson }}\n {{- \"}\" }}\n {%- if not loop.last %}\n {{- \", \" }}\n {%- endif %}\n {%- endfor %}\n {{- \"]\" }}\n {{- \"<|eot_id|>\" }}\n {%- elif message['content'] %}\n {{- message['content'] | trim + '<|eot_id|>' }}\n {%- else %}\n {{- \"[]\\n\" + '<|eot_id|>' }}\n {%- endif %}\n {%- elif message.role == \"tool\" or message.role == \"ipython\" %}\n {{- \"<|start_header_id|>\" + \"ipython\" + \"<|end_header_id|>\\n\\n\" }}\n {%- set content = message[\"content\"] %}\n {%- if content is mapping or (content is iterable and content is not string) %}\n {{- content | tojson }}\n {%- else %}\n {{- content }}\n {%- endif %}\n {{- \"<|eot_id|>\" }}\n {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' }}\n{%- endif %}", + "clean_up_tokenization_spaces": true, + "eos_token": "<|eot_id|>", + "extra_special_tokens": {}, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 16384, + "pad_token": "<|eot_id|>", + "padding_side": "right", + "split_special_tokens": false, + "tokenizer_class": "PreTrainedTokenizerFast" +} diff --git a/plot_loss_from_trainer_state.py b/plot_loss_from_trainer_state.py new file mode 100644 index 0000000000000000000000000000000000000000..eaf442644d02d8625f8186aa6d050495712cd2f9 --- /dev/null +++ b/plot_loss_from_trainer_state.py @@ -0,0 +1,160 @@ +# -*- coding: utf-8 -*- +""" +Usage: + python plot_loss_from_trainer_state.py --input trainer_state.json --outdir ./plots \ + --checkpoint_steps 263,526,789,1052 + +功能: +- Curve: 黃橘色實線 +- Grid: x,y 虛線 +- Epoch markers: 藍色虛線 + EpochN 標籤(含最後一個 epoch) +- Checkpoints: 藍色小圓點(線性插值;超出範圍時使用端點值,並自動擴張 x 軸確保能看見) +""" +import json, argparse +from pathlib import Path +import matplotlib.pyplot as plt +import numpy as np + +YELLOW_ORANGE = "#d58f00" +BLUE = "#1f77b4" + +def find_epoch_boundaries(log_items): + """找到每個 epoch 邊界 (包含最後一個)""" + boundaries = [] + prev_epoch_int = None + seen = set() + last_step, last_epoch = None, None + for it in log_items: + step = it.get("step") + ep = it.get("epoch") + if step is None or ep is None: + continue + last_step, last_epoch = step, ep + ep_int = int(ep) + if prev_epoch_int is None: + prev_epoch_int = ep_int + continue + if ep_int != prev_epoch_int: + if (step, ep_int) not in seen and ep_int >= 1: + boundaries.append((step, ep_int)) + seen.add((step, ep_int)) + prev_epoch_int = ep_int + # 最後一個 epoch 也補上 + if last_step is not None and last_epoch is not None: + ep_final = int(float(last_epoch)) + 1 + if (last_step, ep_final) not in seen: + boundaries.append((last_step, ep_final)) + boundaries.sort(key=lambda x: x[0]) + return boundaries + +def plot_series(x, y, xlabel, ylabel, title, outpath, + epoch_marks=None, checkpoint_steps=None, + color=YELLOW_ORANGE, linestyle='-'): + fig = plt.figure(figsize=(10,6)) + ax = fig.add_subplot(111) + ax.plot(x, y, color=color, linestyle=linestyle, linewidth=2) + + # 標記 checkpoint 藍點(線性插值;邊界外使用端點值) + extra_x = [] + if checkpoint_steps: + for s in checkpoint_steps: + y_interp = np.interp(s, x, y, left=y[0], right=y[-1]) + ax.plot(s, y_interp, marker='o', color=BLUE, markersize=6) + extra_x.append(s) + + # === 計算 x 範圍時把 epoch 標線也納入,並加右側 padding === + xmin = 0 + all_x_candidates = [max(x)] + if extra_x: + all_x_candidates.append(max(extra_x)) + if epoch_marks: + # 把所有 epoch 標線的 step 納入考量 + ep_steps = [s for (s, _) in epoch_marks] + if ep_steps: + all_x_candidates.append(max(ep_steps)) + + xmax_base = max(all_x_candidates) if all_x_candidates else x[-1] + + # 右邊加一點 margin,避免剛好貼齊看不到線 + span = max(xmax_base - xmin, 1.0) + right_pad = max(1.0, 0.02 * span) # 至少 +1 step 或 2% 寬度 + ax.set_xlim(left=xmin, right=xmax_base + right_pad) + + # y 仍從 0 起 + ax.set_ylim(bottom=0) + + # 虛線格線 + ax.grid(True, which='major', axis='both', linestyle='--', linewidth=0.8, alpha=0.6) + + # epoch 標記 (藍色虛線) + if epoch_marks: + for step, ep in epoch_marks: + ax.axvline(x=step, color=BLUE, linestyle='--', linewidth=1.2) + ymax = ax.get_ylim()[1] + ax.text(step, ymax*0.98, f'Epoch{ep}', rotation=90, + va='top', ha='right', fontsize=8, color=BLUE) + + # label & look(放到最後避免被 set_xlim/set_ylim 影響) + ax.set_xlabel(xlabel); ax.set_ylabel(ylabel); ax.set_title(title) + ax.spines['left'].set_linewidth(2); ax.spines['bottom'].set_linewidth(2) + ax.spines['right'].set_visible(False); ax.spines['top'].set_visible(False) + + fig.savefig(outpath, bbox_inches="tight") + plt.close(fig) + + +def main(): + ap = argparse.ArgumentParser() + ap.add_argument("--input", required=True, help="Path to trainer_state.json") + ap.add_argument("--outdir", default="./plots", help="Directory to save PNGs") + ap.add_argument("--no_epoch_marks", action="store_true", help="Disable vertical epoch markers") + ap.add_argument("--checkpoint_steps", default="", help="Comma-separated steps (e.g., 100,200,500)") + args = ap.parse_args() + + src = Path(args.input) + with open(src, "r", encoding="utf-8") as f: + state = json.load(f) + + log = state.get("log_history", state.get("logs", [])) + + steps, train_losses = [], [] + eval_steps, eval_losses = [], [] + lr_steps, lrs = [], [] + + for item in log: + step = item.get("step") + if step is None: + continue + if "loss" in item: + steps.append(step); train_losses.append(item["loss"]) + if "eval_loss" in item: + eval_steps.append(step); eval_losses.append(item["eval_loss"]) + if "learning_rate" in item: + lr_steps.append(step); lrs.append(item["learning_rate"]) + + outdir = Path(args.outdir); outdir.mkdir(parents=True, exist_ok=True) + + epoch_marks = None if args.no_epoch_marks else find_epoch_boundaries(log) + # 允許空白與混合格式 + raw = [s.strip() for s in args.checkpoint_steps.replace(",", ",").split(",") if s.strip()] + checkpoint_steps = [] + for s in raw: + try: + checkpoint_steps.append(int(float(s))) + except: + pass + + if steps and train_losses: + plot_series(steps, train_losses, "Step", "Training Loss", "Training Loss vs Step", + outdir / "loss_curve.png", epoch_marks=epoch_marks, checkpoint_steps=checkpoint_steps) + if eval_steps and eval_losses: + plot_series(eval_steps, eval_losses, "Step", "Eval Loss", "Eval Loss vs Step", + outdir / "eval_loss_curve.png", epoch_marks=epoch_marks, checkpoint_steps=checkpoint_steps) + if lr_steps and lrs: + plot_series(lr_steps, lrs, "Step", "Learning Rate", "Learning Rate vs Step", + outdir / "lr_curve.png", epoch_marks=epoch_marks, checkpoint_steps=checkpoint_steps) + + print(f"Saved plots to: {outdir.resolve()}") + +if __name__ == "__main__": + main() diff --git a/plots/.ipynb_checkpoints/loss_curve-checkpoint.png b/plots/.ipynb_checkpoints/loss_curve-checkpoint.png new file mode 100644 index 0000000000000000000000000000000000000000..e783683efc49a0ab1f3eab80791efe9a8cffa913 Binary files /dev/null and b/plots/.ipynb_checkpoints/loss_curve-checkpoint.png differ diff --git a/plots/eval_loss_curve.png b/plots/eval_loss_curve.png new file mode 100644 index 0000000000000000000000000000000000000000..112bd60f0c6b420ac9dc244f8410e21e6b4857ac Binary files /dev/null and b/plots/eval_loss_curve.png differ diff --git a/plots/loss_curve.png b/plots/loss_curve.png new file mode 100644 index 0000000000000000000000000000000000000000..e783683efc49a0ab1f3eab80791efe9a8cffa913 Binary files /dev/null and b/plots/loss_curve.png differ diff --git a/plots/lr_curve.png b/plots/lr_curve.png new file mode 100644 index 0000000000000000000000000000000000000000..417bf3c67c8981ed8bda070bf4cf431b51a63594 Binary files /dev/null and b/plots/lr_curve.png differ diff --git a/runs/Oct15_18-23-48_pytorch-deployment-1978518505210822657-79b6d97bc-tl4dj/events.out.tfevents.1760552638.pytorch-deployment-1978518505210822657-79b6d97bc-tl4dj.2027.0 b/runs/Oct15_18-23-48_pytorch-deployment-1978518505210822657-79b6d97bc-tl4dj/events.out.tfevents.1760552638.pytorch-deployment-1978518505210822657-79b6d97bc-tl4dj.2027.0 new file mode 100644 index 0000000000000000000000000000000000000000..fd9fc908b4b8f60b49434ade93474c6403eb7826 --- /dev/null +++ b/runs/Oct15_18-23-48_pytorch-deployment-1978518505210822657-79b6d97bc-tl4dj/events.out.tfevents.1760552638.pytorch-deployment-1978518505210822657-79b6d97bc-tl4dj.2027.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:22c9870db7f3a1d7836595dff8a2f26859e02ef7c609fbb6559856596ccb754c +size 47025 diff --git a/special_tokens_map.json b/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..1c69f55fd7bb10288d16d4c76497e21e25adfe4f --- /dev/null +++ b/special_tokens_map.json @@ -0,0 +1,27 @@ +{ + "additional_special_tokens": [ + "<|eot_id|>", + "<|eom_id|>" + ], + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/tokenizer.json b/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..1c1d8d5c9024994f1d3b00f9662b8dd89ca13cf2 --- /dev/null +++ b/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b9e4e7fb171f92fd137b777cc2714bf87d11576700a1dcd7a399e7bbe39537b +size 17209920 diff --git a/tokenizer_config.json b/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..3c3fc3b2c234aa8b39e8ee01e3ee9e6fe54a2a1f --- /dev/null +++ b/tokenizer_config.json @@ -0,0 +1,2070 @@ +{ + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|python_tag|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "additional_special_tokens": [ + "<|eot_id|>", + "<|eom_id|>" + ], + "bos_token": "<|begin_of_text|>", + "chat_template": "{{- bos_token }}\n{%- if custom_tools is defined %}\n {%- set tools = custom_tools %}\n{%- endif %}\n{%- if not tools_in_user_message is defined %}\n {%- set tools_in_user_message = true %}\n{%- endif %}\n{%- if not tools is defined %}\n {%- set tools = none %}\n{%- endif %}\n\n{#- Extract system message #}\n{{- \"<|start_header_id|>system<|end_header_id|>\\n\\n\" }}\n{%- if messages[0]['role'] == 'system' %}\n {%- set system_message = messages[0]['content'] | trim %}\n {%- set messages = messages[1:] %}\n {{- system_message + \"\\n\" }}\n{%- else %}\n {%- set system_message = \"You are a helpful assistant that can use tools. You are developed by Salesforce xLAM team.\" %}\n {% set format_instruction %}You have access to a set of tools. When using tools, make calls in a single JSON array: \n\n[{\"name\": \"tool_call_name\", \"arguments\": {\"arg1\": \"value1\", \"arg2\": \"value2\"}}, ... (additional parallel tool calls as needed)]\n\nIf no tool is suitable, state that explicitly. If the user's input lacks required parameters, ask for clarification. Do not interpret or respond until tool results are returned. Once they are available, process them or make additional calls if needed. For tasks that don't require tools, such as casual conversation or general advice, respond directly in plain text. The available tools are:{% endset %}\n {{- system_message + \"\\n\" }}\n {%- if tools is not none %}\n {{- format_instruction + \"\\n\\n\" }}\n {%- endif %}\n{%- endif %}\n\n\n{%- if tools is not none %}\n {%- for t in tools %}\n {{- t | tojson(indent=4) }}\n {{- \"\\n\\n\" }}\n {%- endfor %}\n{%- endif %}\n{{- \"<|eot_id|>\" }}\n\n{%- for message in messages %}\n {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %}\n {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\\n\\n'+ message['content'] | trim + '<|eot_id|>' }}\n {%- elif 'tool_calls' in message %}\n {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' -}}\n {%- if message['tool_calls'] %}\n {{- \"[\" }}\n {%- for tool_call_function in message.tool_calls %}\n {%- set tool_call = tool_call_function.function %}\n {{- '{\"name\": \"' + tool_call.name + '\", ' }}\n {{- '\"arguments\": ' }}\n {{- tool_call.arguments | tojson }}\n {{- \"}\" }}\n {%- if not loop.last %}\n {{- \", \" }}\n {%- endif %}\n {%- endfor %}\n {{- \"]\" }}\n {{- \"<|eot_id|>\" }}\n {%- elif message['content'] %}\n {{- message['content'] | trim + '<|eot_id|>' }}\n {%- else %}\n {{- \"[]\\n\" + '<|eot_id|>' }}\n {%- endif %}\n {%- elif message.role == \"tool\" or message.role == \"ipython\" %}\n {{- \"<|start_header_id|>\" + \"ipython\" + \"<|end_header_id|>\\n\\n\" }}\n {%- set content = message[\"content\"] %}\n {%- if content is mapping or (content is iterable and content is not string) %}\n {{- content | tojson }}\n {%- else %}\n {{- content }}\n {%- endif %}\n {{- \"<|eot_id|>\" }}\n {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' }}\n{%- endif %}", + "clean_up_tokenization_spaces": true, + "eos_token": "<|eot_id|>", + "extra_special_tokens": {}, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 16384, + "pad_token": "<|eot_id|>", + "padding_side": "right", + "split_special_tokens": false, + "tokenizer_class": "PreTrainedTokenizerFast" +} diff --git a/trainer_state.json b/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..9eb2df5967291622bda2a55e7ef0214149b331d4 --- /dev/null +++ b/trainer_state.json @@ -0,0 +1,1394 @@ +{ + "best_metric": 0.21331782639026642, + "best_model_checkpoint": "./xlam_lora_new_ete_over_size_3epoch_multi_mix/checkpoint-1640", + "epoch": 2.9986264435061303, + "eval_steps": 205, + "global_step": 1842, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.016279188075494735, + "grad_norm": 0.7572630643844604, + "learning_rate": 2.688172043010753e-06, + "loss": 0.5223, + "step": 10 + }, + { + "epoch": 0.03255837615098947, + "grad_norm": 0.417061984539032, + "learning_rate": 5.376344086021506e-06, + "loss": 0.4858, + "step": 20 + }, + { + "epoch": 0.048837564226484206, + "grad_norm": 0.3718095123767853, + "learning_rate": 8.064516129032258e-06, + "loss": 0.4246, + "step": 30 + }, + { + "epoch": 0.06511675230197894, + "grad_norm": 0.2949349582195282, + "learning_rate": 1.0752688172043012e-05, + "loss": 0.4405, + "step": 40 + }, + { + "epoch": 0.08139594037747368, + "grad_norm": 0.3159159719944, + "learning_rate": 1.3440860215053763e-05, + "loss": 0.4148, + "step": 50 + }, + { + "epoch": 0.09767512845296841, + "grad_norm": 0.4167034327983856, + "learning_rate": 1.6129032258064517e-05, + "loss": 0.3393, + "step": 60 + }, + { + "epoch": 0.11395431652846315, + "grad_norm": 0.39410400390625, + "learning_rate": 1.881720430107527e-05, + "loss": 0.2464, + "step": 70 + }, + { + "epoch": 0.13023350460395788, + "grad_norm": 0.3644021153450012, + "learning_rate": 2.1505376344086024e-05, + "loss": 0.2294, + "step": 80 + }, + { + "epoch": 0.1465126926794526, + "grad_norm": 0.30372634530067444, + "learning_rate": 2.4193548387096777e-05, + "loss": 0.2315, + "step": 90 + }, + { + "epoch": 0.16279188075494735, + "grad_norm": 0.2586315870285034, + "learning_rate": 2.4999011923655086e-05, + "loss": 0.1932, + "step": 100 + }, + { + "epoch": 0.17907106883044208, + "grad_norm": 0.37825971841812134, + "learning_rate": 2.4994172742085852e-05, + "loss": 0.2204, + "step": 110 + }, + { + "epoch": 0.19535025690593683, + "grad_norm": 0.21422357857227325, + "learning_rate": 2.4985302531208654e-05, + "loss": 0.1795, + "step": 120 + }, + { + "epoch": 0.21162944498143155, + "grad_norm": 0.2566869854927063, + "learning_rate": 2.4972404152844008e-05, + "loss": 0.1668, + "step": 130 + }, + { + "epoch": 0.2279086330569263, + "grad_norm": 0.28194501996040344, + "learning_rate": 2.49554817684312e-05, + "loss": 0.1476, + "step": 140 + }, + { + "epoch": 0.24418782113242102, + "grad_norm": 0.24139340221881866, + "learning_rate": 2.4934540837685647e-05, + "loss": 0.1609, + "step": 150 + }, + { + "epoch": 0.26046700920791577, + "grad_norm": 0.3306334614753723, + "learning_rate": 2.490958811683741e-05, + "loss": 0.1638, + "step": 160 + }, + { + "epoch": 0.2767461972834105, + "grad_norm": 0.27301114797592163, + "learning_rate": 2.4880631656451447e-05, + "loss": 0.1494, + "step": 170 + }, + { + "epoch": 0.2930253853589052, + "grad_norm": 0.34037259221076965, + "learning_rate": 2.484768079883018e-05, + "loss": 0.1534, + "step": 180 + }, + { + "epoch": 0.30930457343439993, + "grad_norm": 0.2306762933731079, + "learning_rate": 2.4810746174999418e-05, + "loss": 0.1749, + "step": 190 + }, + { + "epoch": 0.3255837615098947, + "grad_norm": 0.3183388113975525, + "learning_rate": 2.476983970127841e-05, + "loss": 0.1482, + "step": 200 + }, + { + "epoch": 0.33372335554764204, + "eval_loss": 0.3089325428009033, + "eval_runtime": 34.8769, + "eval_samples_per_second": 5.419, + "eval_steps_per_second": 5.419, + "step": 205 + }, + { + "epoch": 0.34186294958538943, + "grad_norm": 0.28704971075057983, + "learning_rate": 2.472497457543525e-05, + "loss": 0.1471, + "step": 210 + }, + { + "epoch": 0.35814213766088415, + "grad_norm": 0.2939195930957794, + "learning_rate": 2.4676165272428866e-05, + "loss": 0.1631, + "step": 220 + }, + { + "epoch": 0.3744213257363789, + "grad_norm": 0.31506845355033875, + "learning_rate": 2.4623427539738897e-05, + "loss": 0.1353, + "step": 230 + }, + { + "epoch": 0.39070051381187365, + "grad_norm": 0.3761660158634186, + "learning_rate": 2.456677839228506e-05, + "loss": 0.1716, + "step": 240 + }, + { + "epoch": 0.4069797018873684, + "grad_norm": 0.29187777638435364, + "learning_rate": 2.450623610693757e-05, + "loss": 0.1195, + "step": 250 + }, + { + "epoch": 0.4232588899628631, + "grad_norm": 0.46237581968307495, + "learning_rate": 2.4441820216620425e-05, + "loss": 0.1484, + "step": 260 + }, + { + "epoch": 0.4395380780383578, + "grad_norm": 0.4580917954444885, + "learning_rate": 2.437355150400945e-05, + "loss": 0.1009, + "step": 270 + }, + { + "epoch": 0.4558172661138526, + "grad_norm": 0.4181467890739441, + "learning_rate": 2.4301451994827112e-05, + "loss": 0.1376, + "step": 280 + }, + { + "epoch": 0.4720964541893473, + "grad_norm": 0.3629908561706543, + "learning_rate": 2.422554495073633e-05, + "loss": 0.1083, + "step": 290 + }, + { + "epoch": 0.48837564226484204, + "grad_norm": 0.4282682240009308, + "learning_rate": 2.4145854861835447e-05, + "loss": 0.1373, + "step": 300 + }, + { + "epoch": 0.5046548303403368, + "grad_norm": 0.4914080500602722, + "learning_rate": 2.406240743875699e-05, + "loss": 0.156, + "step": 310 + }, + { + "epoch": 0.5209340184158315, + "grad_norm": 0.3880573511123657, + "learning_rate": 2.3975229604372526e-05, + "loss": 0.1415, + "step": 320 + }, + { + "epoch": 0.5372132064913262, + "grad_norm": 0.42599862813949585, + "learning_rate": 2.3884349485106477e-05, + "loss": 0.1338, + "step": 330 + }, + { + "epoch": 0.553492394566821, + "grad_norm": 0.4339046776294708, + "learning_rate": 2.378979640186163e-05, + "loss": 0.1368, + "step": 340 + }, + { + "epoch": 0.5697715826423158, + "grad_norm": 0.30713170766830444, + "learning_rate": 2.3691600860559222e-05, + "loss": 0.1154, + "step": 350 + }, + { + "epoch": 0.5860507707178104, + "grad_norm": 0.4618566036224365, + "learning_rate": 2.3589794542296764e-05, + "loss": 0.1203, + "step": 360 + }, + { + "epoch": 0.6023299587933052, + "grad_norm": 0.40802672505378723, + "learning_rate": 2.3484410293126664e-05, + "loss": 0.1144, + "step": 370 + }, + { + "epoch": 0.6186091468687999, + "grad_norm": 0.5242702960968018, + "learning_rate": 2.3375482113459014e-05, + "loss": 0.1281, + "step": 380 + }, + { + "epoch": 0.6348883349442946, + "grad_norm": 0.4045926630496979, + "learning_rate": 2.3263045147091944e-05, + "loss": 0.1145, + "step": 390 + }, + { + "epoch": 0.6511675230197894, + "grad_norm": 0.5347346067428589, + "learning_rate": 2.3147135669873096e-05, + "loss": 0.1256, + "step": 400 + }, + { + "epoch": 0.6674467110952841, + "grad_norm": 0.4755608141422272, + "learning_rate": 2.302779107799583e-05, + "loss": 0.1251, + "step": 410 + }, + { + "epoch": 0.6674467110952841, + "eval_loss": 0.26128318905830383, + "eval_runtime": 34.9177, + "eval_samples_per_second": 5.413, + "eval_steps_per_second": 5.413, + "step": 410 + }, + { + "epoch": 0.6837258991707789, + "grad_norm": 0.4720211923122406, + "learning_rate": 2.290504987593399e-05, + "loss": 0.1399, + "step": 420 + }, + { + "epoch": 0.7000050872462736, + "grad_norm": 0.709035336971283, + "learning_rate": 2.2778951664019105e-05, + "loss": 0.1375, + "step": 430 + }, + { + "epoch": 0.7162842753217683, + "grad_norm": 0.534866213798523, + "learning_rate": 2.2649537125664034e-05, + "loss": 0.1125, + "step": 440 + }, + { + "epoch": 0.7325634633972631, + "grad_norm": 0.522056519985199, + "learning_rate": 2.2516848014237146e-05, + "loss": 0.0943, + "step": 450 + }, + { + "epoch": 0.7488426514727577, + "grad_norm": 0.2830965518951416, + "learning_rate": 2.238092713959133e-05, + "loss": 0.1248, + "step": 460 + }, + { + "epoch": 0.7651218395482525, + "grad_norm": 0.39431601762771606, + "learning_rate": 2.2241818354252113e-05, + "loss": 0.1248, + "step": 470 + }, + { + "epoch": 0.7814010276237473, + "grad_norm": 0.4821482002735138, + "learning_rate": 2.209956653926944e-05, + "loss": 0.1359, + "step": 480 + }, + { + "epoch": 0.797680215699242, + "grad_norm": 0.4956236481666565, + "learning_rate": 2.1954217589737535e-05, + "loss": 0.1232, + "step": 490 + }, + { + "epoch": 0.8139594037747367, + "grad_norm": 0.49444642663002014, + "learning_rate": 2.180581839998766e-05, + "loss": 0.1031, + "step": 500 + }, + { + "epoch": 0.8302385918502315, + "grad_norm": 0.3857091963291168, + "learning_rate": 2.165441684845847e-05, + "loss": 0.1023, + "step": 510 + }, + { + "epoch": 0.8465177799257262, + "grad_norm": 0.4830643832683563, + "learning_rate": 2.150006178224886e-05, + "loss": 0.1067, + "step": 520 + }, + { + "epoch": 0.862796968001221, + "grad_norm": 0.5119408965110779, + "learning_rate": 2.1342803001358278e-05, + "loss": 0.1209, + "step": 530 + }, + { + "epoch": 0.8790761560767156, + "grad_norm": 0.46363013982772827, + "learning_rate": 2.118269124261963e-05, + "loss": 0.1134, + "step": 540 + }, + { + "epoch": 0.8953553441522104, + "grad_norm": 0.42933255434036255, + "learning_rate": 2.1019778163329912e-05, + "loss": 0.1101, + "step": 550 + }, + { + "epoch": 0.9116345322277052, + "grad_norm": 0.5474070906639099, + "learning_rate": 2.0854116324583867e-05, + "loss": 0.1291, + "step": 560 + }, + { + "epoch": 0.9279137203031999, + "grad_norm": 0.43502509593963623, + "learning_rate": 2.0685759174316066e-05, + "loss": 0.0936, + "step": 570 + }, + { + "epoch": 0.9441929083786946, + "grad_norm": 0.632621169090271, + "learning_rate": 2.051476103005684e-05, + "loss": 0.1196, + "step": 580 + }, + { + "epoch": 0.9604720964541893, + "grad_norm": 0.553187906742096, + "learning_rate": 2.034117706140768e-05, + "loss": 0.1186, + "step": 590 + }, + { + "epoch": 0.9767512845296841, + "grad_norm": 0.48446330428123474, + "learning_rate": 2.0165063272241712e-05, + "loss": 0.1249, + "step": 600 + }, + { + "epoch": 0.9930304726051788, + "grad_norm": 0.47837090492248535, + "learning_rate": 1.9986476482635003e-05, + "loss": 0.1097, + "step": 610 + }, + { + "epoch": 1.0011700666429262, + "eval_loss": 0.2388339340686798, + "eval_runtime": 34.8769, + "eval_samples_per_second": 5.419, + "eval_steps_per_second": 5.419, + "step": 615 + }, + { + "epoch": 1.0093096606806735, + "grad_norm": 0.5520356893539429, + "learning_rate": 1.980547431053456e-05, + "loss": 0.131, + "step": 620 + }, + { + "epoch": 1.0255888487561682, + "grad_norm": 0.6150078177452087, + "learning_rate": 1.9622115153168884e-05, + "loss": 0.1187, + "step": 630 + }, + { + "epoch": 1.041868036831663, + "grad_norm": 0.5100656151771545, + "learning_rate": 1.9436458168207117e-05, + "loss": 0.114, + "step": 640 + }, + { + "epoch": 1.0581472249071577, + "grad_norm": 0.5156052112579346, + "learning_rate": 1.9248563254672825e-05, + "loss": 0.1099, + "step": 650 + }, + { + "epoch": 1.0744264129826524, + "grad_norm": 0.4662775993347168, + "learning_rate": 1.9058491033618632e-05, + "loss": 0.1135, + "step": 660 + }, + { + "epoch": 1.0907056010581473, + "grad_norm": 0.4357255697250366, + "learning_rate": 1.886630282856787e-05, + "loss": 0.1036, + "step": 670 + }, + { + "epoch": 1.106984789133642, + "grad_norm": 0.3861764967441559, + "learning_rate": 1.867206064572962e-05, + "loss": 0.1145, + "step": 680 + }, + { + "epoch": 1.1232639772091366, + "grad_norm": 0.4562045633792877, + "learning_rate": 1.8475827153993447e-05, + "loss": 0.1107, + "step": 690 + }, + { + "epoch": 1.1395431652846315, + "grad_norm": 0.332917720079422, + "learning_rate": 1.8277665664710387e-05, + "loss": 0.1266, + "step": 700 + }, + { + "epoch": 1.1558223533601262, + "grad_norm": 0.5971720814704895, + "learning_rate": 1.807764011126663e-05, + "loss": 0.1122, + "step": 710 + }, + { + "epoch": 1.1721015414356208, + "grad_norm": 0.6102172136306763, + "learning_rate": 1.787581502845651e-05, + "loss": 0.1046, + "step": 720 + }, + { + "epoch": 1.1883807295111157, + "grad_norm": 0.5294010043144226, + "learning_rate": 1.767225553166146e-05, + "loss": 0.1044, + "step": 730 + }, + { + "epoch": 1.2046599175866104, + "grad_norm": 0.5074148178100586, + "learning_rate": 1.7467027295841688e-05, + "loss": 0.1251, + "step": 740 + }, + { + "epoch": 1.220939105662105, + "grad_norm": 0.6349917650222778, + "learning_rate": 1.7260196534347235e-05, + "loss": 0.1037, + "step": 750 + }, + { + "epoch": 1.2372182937376, + "grad_norm": 0.34580153226852417, + "learning_rate": 1.7051829977555426e-05, + "loss": 0.0831, + "step": 760 + }, + { + "epoch": 1.2534974818130946, + "grad_norm": 0.4629954993724823, + "learning_rate": 1.684199485134144e-05, + "loss": 0.1068, + "step": 770 + }, + { + "epoch": 1.2697766698885893, + "grad_norm": 0.6406750082969666, + "learning_rate": 1.6630758855389055e-05, + "loss": 0.1192, + "step": 780 + }, + { + "epoch": 1.286055857964084, + "grad_norm": 0.4982251226902008, + "learning_rate": 1.6418190141348485e-05, + "loss": 0.123, + "step": 790 + }, + { + "epoch": 1.3023350460395788, + "grad_norm": 0.5146717429161072, + "learning_rate": 1.6204357290848464e-05, + "loss": 0.0831, + "step": 800 + }, + { + "epoch": 1.3186142341150735, + "grad_norm": 0.4735712707042694, + "learning_rate": 1.5989329293369538e-05, + "loss": 0.0971, + "step": 810 + }, + { + "epoch": 1.3348934221905682, + "grad_norm": 0.7393200397491455, + "learning_rate": 1.5773175523985818e-05, + "loss": 0.0923, + "step": 820 + }, + { + "epoch": 1.3348934221905682, + "eval_loss": 0.22815725207328796, + "eval_runtime": 34.8794, + "eval_samples_per_second": 5.419, + "eval_steps_per_second": 5.419, + "step": 820 + }, + { + "epoch": 1.351172610266063, + "grad_norm": 0.8956180214881897, + "learning_rate": 1.5555965720982284e-05, + "loss": 0.0817, + "step": 830 + }, + { + "epoch": 1.3674517983415577, + "grad_norm": 0.7423743009567261, + "learning_rate": 1.533776996335497e-05, + "loss": 0.1178, + "step": 840 + }, + { + "epoch": 1.3837309864170524, + "grad_norm": 0.7034802436828613, + "learning_rate": 1.5118658648201145e-05, + "loss": 0.1289, + "step": 850 + }, + { + "epoch": 1.400010174492547, + "grad_norm": 0.48646238446235657, + "learning_rate": 1.4898702468006922e-05, + "loss": 0.0839, + "step": 860 + }, + { + "epoch": 1.416289362568042, + "grad_norm": 0.28704097867012024, + "learning_rate": 1.4677972387839548e-05, + "loss": 0.0974, + "step": 870 + }, + { + "epoch": 1.4325685506435366, + "grad_norm": 0.674045205116272, + "learning_rate": 1.4456539622451748e-05, + "loss": 0.1006, + "step": 880 + }, + { + "epoch": 1.4488477387190315, + "grad_norm": 0.3513787090778351, + "learning_rate": 1.4234475613305509e-05, + "loss": 0.1104, + "step": 890 + }, + { + "epoch": 1.4651269267945262, + "grad_norm": 0.8029477596282959, + "learning_rate": 1.4011852005522727e-05, + "loss": 0.1131, + "step": 900 + }, + { + "epoch": 1.4814061148700208, + "grad_norm": 0.5420731902122498, + "learning_rate": 1.378874062477015e-05, + "loss": 0.0943, + "step": 910 + }, + { + "epoch": 1.4976853029455155, + "grad_norm": 0.7574429512023926, + "learning_rate": 1.3565213454086048e-05, + "loss": 0.1234, + "step": 920 + }, + { + "epoch": 1.5139644910210102, + "grad_norm": 0.5867305994033813, + "learning_rate": 1.3341342610656157e-05, + "loss": 0.1036, + "step": 930 + }, + { + "epoch": 1.530243679096505, + "grad_norm": 0.47744086384773254, + "learning_rate": 1.311720032254629e-05, + "loss": 0.1082, + "step": 940 + }, + { + "epoch": 1.546522867172, + "grad_norm": 0.6975990533828735, + "learning_rate": 1.289285890539919e-05, + "loss": 0.0967, + "step": 950 + }, + { + "epoch": 1.5628020552474946, + "grad_norm": 0.7781053781509399, + "learning_rate": 1.2668390739103172e-05, + "loss": 0.1219, + "step": 960 + }, + { + "epoch": 1.5790812433229893, + "grad_norm": 0.5423984527587891, + "learning_rate": 1.2443868244439958e-05, + "loss": 0.1085, + "step": 970 + }, + { + "epoch": 1.595360431398484, + "grad_norm": 0.5535146594047546, + "learning_rate": 1.2219363859719392e-05, + "loss": 0.0942, + "step": 980 + }, + { + "epoch": 1.6116396194739786, + "grad_norm": 0.30531561374664307, + "learning_rate": 1.1994950017408451e-05, + "loss": 0.0944, + "step": 990 + }, + { + "epoch": 1.6279188075494735, + "grad_norm": 0.7325620055198669, + "learning_rate": 1.1770699120762161e-05, + "loss": 0.1126, + "step": 1000 + }, + { + "epoch": 1.6441979956249682, + "grad_norm": 1.1568708419799805, + "learning_rate": 1.1546683520463961e-05, + "loss": 0.1073, + "step": 1010 + }, + { + "epoch": 1.660477183700463, + "grad_norm": 0.6926931142807007, + "learning_rate": 1.1322975491282961e-05, + "loss": 0.0825, + "step": 1020 + }, + { + "epoch": 1.6686167777382104, + "eval_loss": 0.22156645357608795, + "eval_runtime": 34.8778, + "eval_samples_per_second": 5.419, + "eval_steps_per_second": 5.419, + "step": 1025 + }, + { + "epoch": 1.6767563717759577, + "grad_norm": 0.41277509927749634, + "learning_rate": 1.1099647208755764e-05, + "loss": 0.0991, + "step": 1030 + }, + { + "epoch": 1.6930355598514524, + "grad_norm": 0.4389091730117798, + "learning_rate": 1.0876770725900265e-05, + "loss": 0.088, + "step": 1040 + }, + { + "epoch": 1.709314747926947, + "grad_norm": 0.48445749282836914, + "learning_rate": 1.0654417949968986e-05, + "loss": 0.1158, + "step": 1050 + }, + { + "epoch": 1.725593936002442, + "grad_norm": 0.6507833003997803, + "learning_rate": 1.0432660619249448e-05, + "loss": 0.1099, + "step": 1060 + }, + { + "epoch": 1.7418731240779366, + "grad_norm": 0.6933814883232117, + "learning_rate": 1.0211570279919044e-05, + "loss": 0.0757, + "step": 1070 + }, + { + "epoch": 1.7581523121534315, + "grad_norm": 0.7795721292495728, + "learning_rate": 9.991218262961901e-06, + "loss": 0.1017, + "step": 1080 + }, + { + "epoch": 1.7744315002289262, + "grad_norm": 0.594406008720398, + "learning_rate": 9.771675661155165e-06, + "loss": 0.1144, + "step": 1090 + }, + { + "epoch": 1.7907106883044208, + "grad_norm": 0.34790194034576416, + "learning_rate": 9.553013306132158e-06, + "loss": 0.0904, + "step": 1100 + }, + { + "epoch": 1.8069898763799155, + "grad_norm": 0.4349744915962219, + "learning_rate": 9.335301745529751e-06, + "loss": 0.1085, + "step": 1110 + }, + { + "epoch": 1.8232690644554101, + "grad_norm": 0.5773786306381226, + "learning_rate": 9.118611220227399e-06, + "loss": 0.1038, + "step": 1120 + }, + { + "epoch": 1.839548252530905, + "grad_norm": 0.4364662766456604, + "learning_rate": 8.903011641685128e-06, + "loss": 0.097, + "step": 1130 + }, + { + "epoch": 1.8558274406063997, + "grad_norm": 0.7753048539161682, + "learning_rate": 8.688572569387817e-06, + "loss": 0.1045, + "step": 1140 + }, + { + "epoch": 1.8721066286818946, + "grad_norm": 0.48441290855407715, + "learning_rate": 8.475363188403022e-06, + "loss": 0.095, + "step": 1150 + }, + { + "epoch": 1.8883858167573893, + "grad_norm": 0.6351140141487122, + "learning_rate": 8.263452287059607e-06, + "loss": 0.0977, + "step": 1160 + }, + { + "epoch": 1.904665004832884, + "grad_norm": 0.8837946057319641, + "learning_rate": 8.052908234754376e-06, + "loss": 0.0987, + "step": 1170 + }, + { + "epoch": 1.9209441929083786, + "grad_norm": 0.48196184635162354, + "learning_rate": 7.84379895989388e-06, + "loss": 0.088, + "step": 1180 + }, + { + "epoch": 1.9372233809838735, + "grad_norm": 0.5001464486122131, + "learning_rate": 7.636191927978465e-06, + "loss": 0.1161, + "step": 1190 + }, + { + "epoch": 1.9535025690593681, + "grad_norm": 0.6405985951423645, + "learning_rate": 7.430154119835716e-06, + "loss": 0.1023, + "step": 1200 + }, + { + "epoch": 1.969781757134863, + "grad_norm": 0.7047804594039917, + "learning_rate": 7.225752010010231e-06, + "loss": 0.1131, + "step": 1210 + }, + { + "epoch": 1.9860609452103577, + "grad_norm": 0.5221819281578064, + "learning_rate": 7.023051545316763e-06, + "loss": 0.0948, + "step": 1220 + }, + { + "epoch": 2.0023401332858524, + "grad_norm": 0.4171787202358246, + "learning_rate": 6.822118123563614e-06, + "loss": 0.0995, + "step": 1230 + }, + { + "epoch": 2.0023401332858524, + "eval_loss": 0.21631866693496704, + "eval_runtime": 34.8988, + "eval_samples_per_second": 5.416, + "eval_steps_per_second": 5.416, + "step": 1230 + }, + { + "epoch": 2.018619321361347, + "grad_norm": 0.7596387267112732, + "learning_rate": 6.623016572453172e-06, + "loss": 0.104, + "step": 1240 + }, + { + "epoch": 2.0348985094368417, + "grad_norm": 0.3702397346496582, + "learning_rate": 6.425811128666353e-06, + "loss": 0.0693, + "step": 1250 + }, + { + "epoch": 2.0511776975123364, + "grad_norm": 0.605099081993103, + "learning_rate": 6.230565417137758e-06, + "loss": 0.097, + "step": 1260 + }, + { + "epoch": 2.0674568855878315, + "grad_norm": 0.4555053412914276, + "learning_rate": 6.03734243052818e-06, + "loss": 0.0976, + "step": 1270 + }, + { + "epoch": 2.083736073663326, + "grad_norm": 0.7848448157310486, + "learning_rate": 5.8462045089011066e-06, + "loss": 0.1013, + "step": 1280 + }, + { + "epoch": 2.100015261738821, + "grad_norm": 0.6905212998390198, + "learning_rate": 5.657213319609776e-06, + "loss": 0.1094, + "step": 1290 + }, + { + "epoch": 2.1162944498143155, + "grad_norm": 0.5153264999389648, + "learning_rate": 5.4704298374012834e-06, + "loss": 0.0789, + "step": 1300 + }, + { + "epoch": 2.13257363788981, + "grad_norm": 0.8393344879150391, + "learning_rate": 5.2859143247441e-06, + "loss": 0.0904, + "step": 1310 + }, + { + "epoch": 2.148852825965305, + "grad_norm": 0.7440715432167053, + "learning_rate": 5.103726312385452e-06, + "loss": 0.0938, + "step": 1320 + }, + { + "epoch": 2.1651320140408, + "grad_norm": 0.8069117069244385, + "learning_rate": 4.923924580144743e-06, + "loss": 0.0908, + "step": 1330 + }, + { + "epoch": 2.1814112021162946, + "grad_norm": 0.5500065088272095, + "learning_rate": 4.746567137949261e-06, + "loss": 0.0976, + "step": 1340 + }, + { + "epoch": 2.1976903901917892, + "grad_norm": 0.51816725730896, + "learning_rate": 4.5717112071182715e-06, + "loss": 0.0889, + "step": 1350 + }, + { + "epoch": 2.213969578267284, + "grad_norm": 0.4226435124874115, + "learning_rate": 4.399413201901559e-06, + "loss": 0.0814, + "step": 1360 + }, + { + "epoch": 2.2302487663427786, + "grad_norm": 0.4923081398010254, + "learning_rate": 4.229728711278325e-06, + "loss": 0.086, + "step": 1370 + }, + { + "epoch": 2.2465279544182732, + "grad_norm": 0.5883035659790039, + "learning_rate": 4.062712481022371e-06, + "loss": 0.095, + "step": 1380 + }, + { + "epoch": 2.2628071424937684, + "grad_norm": 0.5114026069641113, + "learning_rate": 3.898418396039323e-06, + "loss": 0.1038, + "step": 1390 + }, + { + "epoch": 2.279086330569263, + "grad_norm": 0.5486142039299011, + "learning_rate": 3.7368994629815953e-06, + "loss": 0.0902, + "step": 1400 + }, + { + "epoch": 2.2953655186447577, + "grad_norm": 0.756912350654602, + "learning_rate": 3.5782077931467e-06, + "loss": 0.0706, + "step": 1410 + }, + { + "epoch": 2.3116447067202524, + "grad_norm": 0.6888672709465027, + "learning_rate": 3.42239458566444e-06, + "loss": 0.1065, + "step": 1420 + }, + { + "epoch": 2.327923894795747, + "grad_norm": 0.5472647547721863, + "learning_rate": 3.269510110978398e-06, + "loss": 0.0815, + "step": 1430 + }, + { + "epoch": 2.3360634888334944, + "eval_loss": 0.21516536176204681, + "eval_runtime": 34.891, + "eval_samples_per_second": 5.417, + "eval_steps_per_second": 5.417, + "step": 1435 + }, + { + "epoch": 2.3442030828712417, + "grad_norm": 0.5613276958465576, + "learning_rate": 3.119603694627042e-06, + "loss": 0.0923, + "step": 1440 + }, + { + "epoch": 2.3604822709467363, + "grad_norm": 0.8540468811988831, + "learning_rate": 2.9727237013296854e-06, + "loss": 0.1192, + "step": 1450 + }, + { + "epoch": 2.3767614590222315, + "grad_norm": 0.7269755005836487, + "learning_rate": 2.828917519382457e-06, + "loss": 0.0889, + "step": 1460 + }, + { + "epoch": 2.393040647097726, + "grad_norm": 0.6140917539596558, + "learning_rate": 2.6882315453692686e-06, + "loss": 0.0936, + "step": 1470 + }, + { + "epoch": 2.409319835173221, + "grad_norm": 0.4730454981327057, + "learning_rate": 2.550711169192775e-06, + "loss": 0.0976, + "step": 1480 + }, + { + "epoch": 2.4255990232487155, + "grad_norm": 0.5974939465522766, + "learning_rate": 2.4164007594300875e-06, + "loss": 0.0913, + "step": 1490 + }, + { + "epoch": 2.44187821132421, + "grad_norm": 0.6668256521224976, + "learning_rate": 2.2853436490180374e-06, + "loss": 0.0982, + "step": 1500 + }, + { + "epoch": 2.458157399399705, + "grad_norm": 0.6182997226715088, + "learning_rate": 2.1575821212725334e-06, + "loss": 0.0861, + "step": 1510 + }, + { + "epoch": 2.4744365874752, + "grad_norm": 0.5460255146026611, + "learning_rate": 2.0331573962465864e-06, + "loss": 0.086, + "step": 1520 + }, + { + "epoch": 2.4907157755506946, + "grad_norm": 0.6361858248710632, + "learning_rate": 1.912109617431372e-06, + "loss": 0.0911, + "step": 1530 + }, + { + "epoch": 2.5069949636261892, + "grad_norm": 0.8699812889099121, + "learning_rate": 1.7944778388046243e-06, + "loss": 0.0884, + "step": 1540 + }, + { + "epoch": 2.523274151701684, + "grad_norm": 0.5886068344116211, + "learning_rate": 1.680300012230543e-06, + "loss": 0.1027, + "step": 1550 + }, + { + "epoch": 2.5395533397771786, + "grad_norm": 0.6138848066329956, + "learning_rate": 1.5696129752152774e-06, + "loss": 0.0939, + "step": 1560 + }, + { + "epoch": 2.5558325278526732, + "grad_norm": 0.7268607020378113, + "learning_rate": 1.4624524390219455e-06, + "loss": 0.083, + "step": 1570 + }, + { + "epoch": 2.572111715928168, + "grad_norm": 0.619888961315155, + "learning_rate": 1.3588529771490054e-06, + "loss": 0.1087, + "step": 1580 + }, + { + "epoch": 2.5883909040036626, + "grad_norm": 0.5299406051635742, + "learning_rate": 1.2588480141757204e-06, + "loss": 0.0997, + "step": 1590 + }, + { + "epoch": 2.6046700920791577, + "grad_norm": 0.6051465272903442, + "learning_rate": 1.1624698149782842e-06, + "loss": 0.0953, + "step": 1600 + }, + { + "epoch": 2.6209492801546523, + "grad_norm": 0.6585546135902405, + "learning_rate": 1.0697494743201226e-06, + "loss": 0.1057, + "step": 1610 + }, + { + "epoch": 2.637228468230147, + "grad_norm": 0.5243381261825562, + "learning_rate": 9.807169068197008e-07, + "loss": 0.09, + "step": 1620 + }, + { + "epoch": 2.6535076563056417, + "grad_norm": 0.6636092066764832, + "learning_rate": 8.95400837299093e-07, + "loss": 0.061, + "step": 1630 + }, + { + "epoch": 2.6697868443811363, + "grad_norm": 0.6529124975204468, + "learning_rate": 8.138287915164078e-07, + "loss": 0.0897, + "step": 1640 + }, + { + "epoch": 2.6697868443811363, + "eval_loss": 0.21331782639026642, + "eval_runtime": 34.9348, + "eval_samples_per_second": 5.41, + "eval_steps_per_second": 5.41, + "step": 1640 + }, + { + "epoch": 2.6860660324566314, + "grad_norm": 0.7361763715744019, + "learning_rate": 7.360270872850808e-07, + "loss": 0.0983, + "step": 1650 + }, + { + "epoch": 2.702345220532126, + "grad_norm": 0.7820421457290649, + "learning_rate": 6.620208259828855e-07, + "loss": 0.0724, + "step": 1660 + }, + { + "epoch": 2.718624408607621, + "grad_norm": 0.47821661829948425, + "learning_rate": 5.918338844534077e-07, + "loss": 0.0906, + "step": 1670 + }, + { + "epoch": 2.7349035966831154, + "grad_norm": 0.5179721713066101, + "learning_rate": 5.25488907302589e-07, + "loss": 0.0851, + "step": 1680 + }, + { + "epoch": 2.75118278475861, + "grad_norm": 0.7704452872276306, + "learning_rate": 4.63007299592845e-07, + "loss": 0.0765, + "step": 1690 + }, + { + "epoch": 2.7674619728341048, + "grad_norm": 0.6302313208580017, + "learning_rate": 4.044092199370797e-07, + "loss": 0.093, + "step": 1700 + }, + { + "epoch": 2.7837411609095994, + "grad_norm": 0.43464457988739014, + "learning_rate": 3.497135739948657e-07, + "loss": 0.0949, + "step": 1710 + }, + { + "epoch": 2.800020348985094, + "grad_norm": 0.6571847796440125, + "learning_rate": 2.98938008372851e-07, + "loss": 0.0897, + "step": 1720 + }, + { + "epoch": 2.816299537060589, + "grad_norm": 0.542305052280426, + "learning_rate": 2.520989049313957e-07, + "loss": 0.0968, + "step": 1730 + }, + { + "epoch": 2.832578725136084, + "grad_norm": 0.5765232443809509, + "learning_rate": 2.0921137549923946e-07, + "loss": 0.0782, + "step": 1740 + }, + { + "epoch": 2.8488579132115786, + "grad_norm": 0.6098420023918152, + "learning_rate": 1.702892569979353e-07, + "loss": 0.0808, + "step": 1750 + }, + { + "epoch": 2.865137101287073, + "grad_norm": 0.5190752148628235, + "learning_rate": 1.353451069776024e-07, + "loss": 0.106, + "step": 1760 + }, + { + "epoch": 2.881416289362568, + "grad_norm": 0.5709157586097717, + "learning_rate": 1.0439019956544893e-07, + "loss": 0.104, + "step": 1770 + }, + { + "epoch": 2.897695477438063, + "grad_norm": 0.6572442054748535, + "learning_rate": 7.743452182837202e-08, + "loss": 0.1155, + "step": 1780 + }, + { + "epoch": 2.9139746655135577, + "grad_norm": 0.8765654563903809, + "learning_rate": 5.448677055080453e-08, + "loss": 0.1118, + "step": 1790 + }, + { + "epoch": 2.9302538535890523, + "grad_norm": 0.3849591910839081, + "learning_rate": 3.555434942884156e-08, + "loss": 0.097, + "step": 1800 + }, + { + "epoch": 2.946533041664547, + "grad_norm": 0.6078172922134399, + "learning_rate": 2.06433666815678e-08, + "loss": 0.0987, + "step": 1810 + }, + { + "epoch": 2.9628122297400417, + "grad_norm": 0.7132030129432678, + "learning_rate": 9.758633080352019e-09, + "loss": 0.0866, + "step": 1820 + }, + { + "epoch": 2.9790914178155363, + "grad_norm": 0.879240870475769, + "learning_rate": 2.903660396723351e-09, + "loss": 0.0863, + "step": 1830 + }, + { + "epoch": 2.995370605891031, + "grad_norm": 0.6857780814170837, + "learning_rate": 8.066026937064709e-11, + "loss": 0.1078, + "step": 1840 + }, + { + "epoch": 2.9986264435061303, + "step": 1842, + "total_flos": 1.5737761155189965e+18, + "train_loss": 0.12212386991865083, + "train_runtime": 37466.128, + "train_samples_per_second": 1.574, + "train_steps_per_second": 0.049 + } + ], + "logging_steps": 10, + "max_steps": 1842, + "num_input_tokens_seen": 0, + "num_train_epochs": 3, + "save_steps": 205, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": true + }, + "attributes": {} + } + }, + "total_flos": 1.5737761155189965e+18, + "train_batch_size": 1, + "trial_name": null, + "trial_params": null +} diff --git a/training_args.bin b/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..2245771e9af5d2e40208df60ece818e12dad2609 --- /dev/null +++ b/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fba26e4087d6f0d4a22136ed77169fa2a327b3a4a3accd6168e4c2098fb9b7b6 +size 5624 diff --git a/training_meta.json b/training_meta.json new file mode 100644 index 0000000000000000000000000000000000000000..ce655c07b0b31ca53c385b4d59c62f2bf9f44e4a --- /dev/null +++ b/training_meta.json @@ -0,0 +1,25 @@ +{ + "num_samples_train": 19657, + "world_size": 1, + "effective_batch_size": 32, + "steps_per_epoch": 615, + "save_steps": 205, + "saves_per_epoch": 3, + "total_steps_est": 1845, + "approx_ckpts": 9, + "target_modules": [ + "q_proj", + "k_proj", + "v_proj", + "o_proj" + ], + "lora_r": 16, + "lora_alpha": 32, + "lora_dropout": 0.05, + "response_template": "<|start_header_id|>assistant<|end_header_id|>", + "use_max_len": 2560, + "label_all_assistant": true, + "skip_tool_only_assistant": false, + "assistant_tag": "<|start_header_id|>assistant<|end_header_id|>", + "tool_use_token": "<|use_tool|>" +} \ No newline at end of file