diff --git a/.gitattributes b/.gitattributes index a6344aac8c09253b3b630fb776ae94478aa0275b..d7ce9240890a566b0c0f6b067364af9ed62ffde2 100644 --- a/.gitattributes +++ b/.gitattributes @@ -33,3 +33,15 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text *.zip filter=lfs diff=lfs merge=lfs -text *.zst filter=lfs diff=lfs merge=lfs -text *tfevents* filter=lfs diff=lfs merge=lfs -text +checkpoint-1038/tokenizer.json filter=lfs diff=lfs merge=lfs -text +checkpoint-1211/tokenizer.json filter=lfs diff=lfs merge=lfs -text +checkpoint-1384/tokenizer.json filter=lfs diff=lfs merge=lfs -text +checkpoint-1557/tokenizer.json filter=lfs diff=lfs merge=lfs -text +checkpoint-1560/tokenizer.json filter=lfs diff=lfs merge=lfs -text +checkpoint-173/tokenizer.json filter=lfs diff=lfs merge=lfs -text +checkpoint-346/tokenizer.json filter=lfs diff=lfs merge=lfs -text +checkpoint-519/tokenizer.json filter=lfs diff=lfs merge=lfs -text +checkpoint-692/tokenizer.json filter=lfs diff=lfs merge=lfs -text +checkpoint-865/tokenizer.json filter=lfs diff=lfs merge=lfs -text +final/tokenizer.json filter=lfs diff=lfs merge=lfs -text +tokenizer.json filter=lfs diff=lfs merge=lfs -text diff --git a/README.md b/README.md new file mode 100644 index 0000000000000000000000000000000000000000..20c165c7feaa880a18e0662941a5b246a698be2e --- /dev/null +++ b/README.md @@ -0,0 +1,209 @@ +--- +base_model: Salesforce/Llama-xLAM-2-8b-fc-r +library_name: peft +pipeline_tag: text-generation +tags: +- base_model:adapter:Salesforce/Llama-xLAM-2-8b-fc-r +- lora +- sft +- transformers +- trl +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.17.1 \ No newline at end of file diff --git a/adapter_config.json b/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..53e6fe7efe54ad6e008096a354e97649fac2cc73 --- /dev/null +++ b/adapter_config.json @@ -0,0 +1,39 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "Salesforce/Llama-xLAM-2-8b-fc-r", + "bias": "none", + "corda_config": null, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 32, + "lora_bias": false, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "qalora_group_size": 16, + "r": 16, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "k_proj", + "q_proj", + "o_proj", + "v_proj" + ], + "target_parameters": null, + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/adapter_model.safetensors b/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..223aba4282e063aecda0254ee6413b4b905c1d65 --- /dev/null +++ b/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d93077484e8ade289587dd40ae63ae2f049621fb9faa5943ab1a4ef383acafab +size 54560368 diff --git a/checkpoint-1038/README.md b/checkpoint-1038/README.md new file mode 100644 index 0000000000000000000000000000000000000000..20c165c7feaa880a18e0662941a5b246a698be2e --- /dev/null +++ b/checkpoint-1038/README.md @@ -0,0 +1,209 @@ +--- +base_model: Salesforce/Llama-xLAM-2-8b-fc-r +library_name: peft +pipeline_tag: text-generation +tags: +- base_model:adapter:Salesforce/Llama-xLAM-2-8b-fc-r +- lora +- sft +- transformers +- trl +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.17.1 \ No newline at end of file diff --git a/checkpoint-1038/adapter_config.json b/checkpoint-1038/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..53e6fe7efe54ad6e008096a354e97649fac2cc73 --- /dev/null +++ b/checkpoint-1038/adapter_config.json @@ -0,0 +1,39 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "Salesforce/Llama-xLAM-2-8b-fc-r", + "bias": "none", + "corda_config": null, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 32, + "lora_bias": false, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "qalora_group_size": 16, + "r": 16, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "k_proj", + "q_proj", + "o_proj", + "v_proj" + ], + "target_parameters": null, + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/checkpoint-1038/adapter_model.safetensors b/checkpoint-1038/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..074df0eb07a3b50ec9e06b560847e3384d0be13a --- /dev/null +++ b/checkpoint-1038/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8bc1801757186a6248ace10fa660a175d8b707c4047346c2cd683ac98ad7ee60 +size 54560368 diff --git a/checkpoint-1038/optimizer.pt b/checkpoint-1038/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..264aca87005ccfc60e1cee8478b10c6794dd133f --- /dev/null +++ b/checkpoint-1038/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8195ae732cbdee86398aa4352b03f34707a2041ce87d4e743d0f1a88833768d6 +size 109267450 diff --git a/checkpoint-1038/rng_state.pth b/checkpoint-1038/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..a422597f2bb05a8baf1a584dca61ae79810e1d42 --- /dev/null +++ b/checkpoint-1038/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8f53af96f0c3ddde7240c20590b783a896a1e2702a1c04b2963bed2341f6f586 +size 14244 diff --git a/checkpoint-1038/scheduler.pt b/checkpoint-1038/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..7610fbeca023df22f9ab2679c3858362601a71bd --- /dev/null +++ b/checkpoint-1038/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8aeff28b0a8d9a3e09a24e53d5cfb0100b23ecd0a9ba81e8f5d9af836a1c264e +size 1064 diff --git a/checkpoint-1038/special_tokens_map.json b/checkpoint-1038/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..1c69f55fd7bb10288d16d4c76497e21e25adfe4f --- /dev/null +++ b/checkpoint-1038/special_tokens_map.json @@ -0,0 +1,27 @@ +{ + "additional_special_tokens": [ + "<|eot_id|>", + "<|eom_id|>" + ], + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/checkpoint-1038/tokenizer.json b/checkpoint-1038/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..1c1d8d5c9024994f1d3b00f9662b8dd89ca13cf2 --- /dev/null +++ b/checkpoint-1038/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b9e4e7fb171f92fd137b777cc2714bf87d11576700a1dcd7a399e7bbe39537b +size 17209920 diff --git a/checkpoint-1038/tokenizer_config.json b/checkpoint-1038/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..3c3fc3b2c234aa8b39e8ee01e3ee9e6fe54a2a1f --- /dev/null +++ b/checkpoint-1038/tokenizer_config.json @@ -0,0 +1,2070 @@ +{ + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|python_tag|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "additional_special_tokens": [ + "<|eot_id|>", + "<|eom_id|>" + ], + "bos_token": "<|begin_of_text|>", + "chat_template": "{{- bos_token }}\n{%- if custom_tools is defined %}\n {%- set tools = custom_tools %}\n{%- endif %}\n{%- if not tools_in_user_message is defined %}\n {%- set tools_in_user_message = true %}\n{%- endif %}\n{%- if not tools is defined %}\n {%- set tools = none %}\n{%- endif %}\n\n{#- Extract system message #}\n{{- \"<|start_header_id|>system<|end_header_id|>\\n\\n\" }}\n{%- if messages[0]['role'] == 'system' %}\n {%- set system_message = messages[0]['content'] | trim %}\n {%- set messages = messages[1:] %}\n {{- system_message + \"\\n\" }}\n{%- else %}\n {%- set system_message = \"You are a helpful assistant that can use tools. You are developed by Salesforce xLAM team.\" %}\n {% set format_instruction %}You have access to a set of tools. When using tools, make calls in a single JSON array: \n\n[{\"name\": \"tool_call_name\", \"arguments\": {\"arg1\": \"value1\", \"arg2\": \"value2\"}}, ... (additional parallel tool calls as needed)]\n\nIf no tool is suitable, state that explicitly. If the user's input lacks required parameters, ask for clarification. Do not interpret or respond until tool results are returned. Once they are available, process them or make additional calls if needed. For tasks that don't require tools, such as casual conversation or general advice, respond directly in plain text. The available tools are:{% endset %}\n {{- system_message + \"\\n\" }}\n {%- if tools is not none %}\n {{- format_instruction + \"\\n\\n\" }}\n {%- endif %}\n{%- endif %}\n\n\n{%- if tools is not none %}\n {%- for t in tools %}\n {{- t | tojson(indent=4) }}\n {{- \"\\n\\n\" }}\n {%- endfor %}\n{%- endif %}\n{{- \"<|eot_id|>\" }}\n\n{%- for message in messages %}\n {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %}\n {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\\n\\n'+ message['content'] | trim + '<|eot_id|>' }}\n {%- elif 'tool_calls' in message %}\n {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' -}}\n {%- if message['tool_calls'] %}\n {{- \"[\" }}\n {%- for tool_call_function in message.tool_calls %}\n {%- set tool_call = tool_call_function.function %}\n {{- '{\"name\": \"' + tool_call.name + '\", ' }}\n {{- '\"arguments\": ' }}\n {{- tool_call.arguments | tojson }}\n {{- \"}\" }}\n {%- if not loop.last %}\n {{- \", \" }}\n {%- endif %}\n {%- endfor %}\n {{- \"]\" }}\n {{- \"<|eot_id|>\" }}\n {%- elif message['content'] %}\n {{- message['content'] | trim + '<|eot_id|>' }}\n {%- else %}\n {{- \"[]\\n\" + '<|eot_id|>' }}\n {%- endif %}\n {%- elif message.role == \"tool\" or message.role == \"ipython\" %}\n {{- \"<|start_header_id|>\" + \"ipython\" + \"<|end_header_id|>\\n\\n\" }}\n {%- set content = message[\"content\"] %}\n {%- if content is mapping or (content is iterable and content is not string) %}\n {{- content | tojson }}\n {%- else %}\n {{- content }}\n {%- endif %}\n {{- \"<|eot_id|>\" }}\n {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' }}\n{%- endif %}", + "clean_up_tokenization_spaces": true, + "eos_token": "<|eot_id|>", + "extra_special_tokens": {}, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 16384, + "pad_token": "<|eot_id|>", + "padding_side": "right", + "split_special_tokens": false, + "tokenizer_class": "PreTrainedTokenizerFast" +} diff --git a/checkpoint-1038/trainer_state.json b/checkpoint-1038/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..7db6a34ddd3bb3558f6fd3e1e988b59cfd54c3f9 --- /dev/null +++ b/checkpoint-1038/trainer_state.json @@ -0,0 +1,802 @@ +{ + "best_metric": 0.10069960355758667, + "best_model_checkpoint": "./xlam_lora_new_2560_1_delete_over_size_3epoch_multi/checkpoint-1038", + "epoch": 1.995794027519077, + "eval_steps": 173, + "global_step": 1038, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.019227302769933306, + "grad_norm": 0.8050442337989807, + "learning_rate": 3.205128205128205e-06, + "loss": 0.6642, + "step": 10 + }, + { + "epoch": 0.03845460553986661, + "grad_norm": 0.45196670293807983, + "learning_rate": 6.41025641025641e-06, + "loss": 0.6377, + "step": 20 + }, + { + "epoch": 0.05768190830979992, + "grad_norm": 0.47538116574287415, + "learning_rate": 9.615384615384616e-06, + "loss": 0.6205, + "step": 30 + }, + { + "epoch": 0.07690921107973323, + "grad_norm": 0.34218236804008484, + "learning_rate": 1.282051282051282e-05, + "loss": 0.5579, + "step": 40 + }, + { + "epoch": 0.09613651384966652, + "grad_norm": 0.38529354333877563, + "learning_rate": 1.602564102564103e-05, + "loss": 0.4393, + "step": 50 + }, + { + "epoch": 0.11536381661959984, + "grad_norm": 0.3676348924636841, + "learning_rate": 1.923076923076923e-05, + "loss": 0.3965, + "step": 60 + }, + { + "epoch": 0.13459111938953314, + "grad_norm": 0.3427989184856415, + "learning_rate": 2.2435897435897437e-05, + "loss": 0.3488, + "step": 70 + }, + { + "epoch": 0.15381842215946645, + "grad_norm": 0.30555886030197144, + "learning_rate": 2.4999887657859027e-05, + "loss": 0.2224, + "step": 80 + }, + { + "epoch": 0.17304572492939974, + "grad_norm": 0.3501119315624237, + "learning_rate": 2.4995955894949523e-05, + "loss": 0.248, + "step": 90 + }, + { + "epoch": 0.19227302769933305, + "grad_norm": 0.36164987087249756, + "learning_rate": 2.4986409044149163e-05, + "loss": 0.2322, + "step": 100 + }, + { + "epoch": 0.21150033046926636, + "grad_norm": 0.3375028967857361, + "learning_rate": 2.4971251395358342e-05, + "loss": 0.2427, + "step": 110 + }, + { + "epoch": 0.23072763323919968, + "grad_norm": 0.3321882486343384, + "learning_rate": 2.495048975970308e-05, + "loss": 0.1967, + "step": 120 + }, + { + "epoch": 0.24995493600913296, + "grad_norm": 0.2828320562839508, + "learning_rate": 2.492413346647437e-05, + "loss": 0.1636, + "step": 130 + }, + { + "epoch": 0.2691822387790663, + "grad_norm": 0.3430372476577759, + "learning_rate": 2.4892194358936095e-05, + "loss": 0.2041, + "step": 140 + }, + { + "epoch": 0.28840954154899956, + "grad_norm": 0.3393559455871582, + "learning_rate": 2.4854686789003173e-05, + "loss": 0.1911, + "step": 150 + }, + { + "epoch": 0.3076368443189329, + "grad_norm": 0.36908936500549316, + "learning_rate": 2.4811627610792543e-05, + "loss": 0.2025, + "step": 160 + }, + { + "epoch": 0.3268641470888662, + "grad_norm": 0.38679710030555725, + "learning_rate": 2.4763036173049677e-05, + "loss": 0.1566, + "step": 170 + }, + { + "epoch": 0.3326323379198462, + "eval_loss": 0.14381718635559082, + "eval_runtime": 202.8561, + "eval_samples_per_second": 5.285, + "eval_steps_per_second": 5.285, + "step": 173 + }, + { + "epoch": 0.34609144985879947, + "grad_norm": 0.39648687839508057, + "learning_rate": 2.4708934310454207e-05, + "loss": 0.1943, + "step": 180 + }, + { + "epoch": 0.3653187526287328, + "grad_norm": 0.3979399800300598, + "learning_rate": 2.4649346333808458e-05, + "loss": 0.1594, + "step": 190 + }, + { + "epoch": 0.3845460553986661, + "grad_norm": 0.34854283928871155, + "learning_rate": 2.458429901911331e-05, + "loss": 0.1683, + "step": 200 + }, + { + "epoch": 0.40377335816859944, + "grad_norm": 0.26675811409950256, + "learning_rate": 2.4513821595536356e-05, + "loss": 0.1616, + "step": 210 + }, + { + "epoch": 0.4230006609385327, + "grad_norm": 0.4399104118347168, + "learning_rate": 2.44379457322777e-05, + "loss": 0.1664, + "step": 220 + }, + { + "epoch": 0.442227963708466, + "grad_norm": 0.5316939353942871, + "learning_rate": 2.4356705524339317e-05, + "loss": 0.1745, + "step": 230 + }, + { + "epoch": 0.46145526647839935, + "grad_norm": 0.5996547341346741, + "learning_rate": 2.4270137477204408e-05, + "loss": 0.1753, + "step": 240 + }, + { + "epoch": 0.48068256924833264, + "grad_norm": 0.4330001175403595, + "learning_rate": 2.417828049043353e-05, + "loss": 0.1997, + "step": 250 + }, + { + "epoch": 0.4999098720182659, + "grad_norm": 0.4255751073360443, + "learning_rate": 2.4081175840185022e-05, + "loss": 0.1728, + "step": 260 + }, + { + "epoch": 0.5191371747881992, + "grad_norm": 0.536382257938385, + "learning_rate": 2.3978867160667457e-05, + "loss": 0.147, + "step": 270 + }, + { + "epoch": 0.5383644775581325, + "grad_norm": 0.5623698830604553, + "learning_rate": 2.3871400424532493e-05, + "loss": 0.1863, + "step": 280 + }, + { + "epoch": 0.5575917803280659, + "grad_norm": 0.49679550528526306, + "learning_rate": 2.375882392221695e-05, + "loss": 0.1685, + "step": 290 + }, + { + "epoch": 0.5768190830979991, + "grad_norm": 0.5784851908683777, + "learning_rate": 2.36411882402434e-05, + "loss": 0.1506, + "step": 300 + }, + { + "epoch": 0.5960463858679325, + "grad_norm": 0.6098183393478394, + "learning_rate": 2.3518546238489e-05, + "loss": 0.1565, + "step": 310 + }, + { + "epoch": 0.6152736886378658, + "grad_norm": 0.5198598504066467, + "learning_rate": 2.339095302643273e-05, + "loss": 0.1433, + "step": 320 + }, + { + "epoch": 0.634500991407799, + "grad_norm": 0.5796005129814148, + "learning_rate": 2.325846593839188e-05, + "loss": 0.1668, + "step": 330 + }, + { + "epoch": 0.6537282941777324, + "grad_norm": 0.6006646752357483, + "learning_rate": 2.312114450775869e-05, + "loss": 0.1505, + "step": 340 + }, + { + "epoch": 0.6652646758396924, + "eval_loss": 0.1198095753788948, + "eval_runtime": 202.8938, + "eval_samples_per_second": 5.284, + "eval_steps_per_second": 5.284, + "step": 346 + }, + { + "epoch": 0.6729555969476657, + "grad_norm": 0.5787773728370667, + "learning_rate": 2.2979050440248896e-05, + "loss": 0.1442, + "step": 350 + }, + { + "epoch": 0.6921828997175989, + "grad_norm": 0.5230283141136169, + "learning_rate": 2.2832247586174118e-05, + "loss": 0.1555, + "step": 360 + }, + { + "epoch": 0.7114102024875323, + "grad_norm": 0.5551069378852844, + "learning_rate": 2.2680801911750558e-05, + "loss": 0.1422, + "step": 370 + }, + { + "epoch": 0.7306375052574656, + "grad_norm": 0.5769614577293396, + "learning_rate": 2.2524781469456928e-05, + "loss": 0.165, + "step": 380 + }, + { + "epoch": 0.7498648080273989, + "grad_norm": 0.6609200239181519, + "learning_rate": 2.2364256367454922e-05, + "loss": 0.161, + "step": 390 + }, + { + "epoch": 0.7690921107973322, + "grad_norm": 0.5530131459236145, + "learning_rate": 2.2199298738085907e-05, + "loss": 0.1709, + "step": 400 + }, + { + "epoch": 0.7883194135672655, + "grad_norm": 0.7019795775413513, + "learning_rate": 2.2029982705458107e-05, + "loss": 0.1471, + "step": 410 + }, + { + "epoch": 0.8075467163371989, + "grad_norm": 0.5327528715133667, + "learning_rate": 2.1856384352138765e-05, + "loss": 0.1913, + "step": 420 + }, + { + "epoch": 0.8267740191071321, + "grad_norm": 0.5548112988471985, + "learning_rate": 2.1678581684966235e-05, + "loss": 0.1509, + "step": 430 + }, + { + "epoch": 0.8460013218770654, + "grad_norm": 0.51619553565979, + "learning_rate": 2.149665459999743e-05, + "loss": 0.1341, + "step": 440 + }, + { + "epoch": 0.8652286246469988, + "grad_norm": 0.6642457842826843, + "learning_rate": 2.1310684846606346e-05, + "loss": 0.1458, + "step": 450 + }, + { + "epoch": 0.884455927416932, + "grad_norm": 0.48370271921157837, + "learning_rate": 2.1120755990749762e-05, + "loss": 0.1584, + "step": 460 + }, + { + "epoch": 0.9036832301868654, + "grad_norm": 0.8130201697349548, + "learning_rate": 2.092695337741671e-05, + "loss": 0.1389, + "step": 470 + }, + { + "epoch": 0.9229105329567987, + "grad_norm": 0.4986889958381653, + "learning_rate": 2.0729364092278456e-05, + "loss": 0.1263, + "step": 480 + }, + { + "epoch": 0.9421378357267319, + "grad_norm": 0.6791219711303711, + "learning_rate": 2.052807692255638e-05, + "loss": 0.1562, + "step": 490 + }, + { + "epoch": 0.9613651384966653, + "grad_norm": 0.6069239974021912, + "learning_rate": 2.0323182317125198e-05, + "loss": 0.1296, + "step": 500 + }, + { + "epoch": 0.9805924412665986, + "grad_norm": 0.6993957161903381, + "learning_rate": 2.011477234586957e-05, + "loss": 0.1695, + "step": 510 + }, + { + "epoch": 0.9978970137595385, + "eval_loss": 0.11108512431383133, + "eval_runtime": 202.9151, + "eval_samples_per_second": 5.283, + "eval_steps_per_second": 5.283, + "step": 519 + }, + { + "epoch": 0.9998197440365318, + "grad_norm": 0.5495030283927917, + "learning_rate": 1.9902940658312253e-05, + "loss": 0.1512, + "step": 520 + }, + { + "epoch": 1.0190470468064652, + "grad_norm": 0.5100754499435425, + "learning_rate": 1.968778244153246e-05, + "loss": 0.1088, + "step": 530 + }, + { + "epoch": 1.0382743495763984, + "grad_norm": 0.6836853623390198, + "learning_rate": 1.9469394377393335e-05, + "loss": 0.1524, + "step": 540 + }, + { + "epoch": 1.0575016523463319, + "grad_norm": 0.5304776430130005, + "learning_rate": 1.9247874599097714e-05, + "loss": 0.1239, + "step": 550 + }, + { + "epoch": 1.076728955116265, + "grad_norm": 0.6995298862457275, + "learning_rate": 1.9023322647091736e-05, + "loss": 0.1203, + "step": 560 + }, + { + "epoch": 1.0959562578861983, + "grad_norm": 0.579207181930542, + "learning_rate": 1.8795839424336097e-05, + "loss": 0.134, + "step": 570 + }, + { + "epoch": 1.1151835606561318, + "grad_norm": 0.4746134877204895, + "learning_rate": 1.8565527150965077e-05, + "loss": 0.1344, + "step": 580 + }, + { + "epoch": 1.134410863426065, + "grad_norm": 0.8127744793891907, + "learning_rate": 1.8332489318353655e-05, + "loss": 0.1157, + "step": 590 + }, + { + "epoch": 1.1536381661959982, + "grad_norm": 0.6949151158332825, + "learning_rate": 1.809683064261343e-05, + "loss": 0.1197, + "step": 600 + }, + { + "epoch": 1.1728654689659317, + "grad_norm": 0.6869731545448303, + "learning_rate": 1.7858657017538178e-05, + "loss": 0.1392, + "step": 610 + }, + { + "epoch": 1.192092771735865, + "grad_norm": 0.7461158037185669, + "learning_rate": 1.7618075467020213e-05, + "loss": 0.1262, + "step": 620 + }, + { + "epoch": 1.2113200745057981, + "grad_norm": 0.5442166924476624, + "learning_rate": 1.7375194096958946e-05, + "loss": 0.1258, + "step": 630 + }, + { + "epoch": 1.2305473772757316, + "grad_norm": 0.7670741081237793, + "learning_rate": 1.713012204668325e-05, + "loss": 0.1204, + "step": 640 + }, + { + "epoch": 1.2497746800456648, + "grad_norm": 0.3919640779495239, + "learning_rate": 1.6882969439909434e-05, + "loss": 0.1444, + "step": 650 + }, + { + "epoch": 1.269001982815598, + "grad_norm": 0.6234434247016907, + "learning_rate": 1.663384733525686e-05, + "loss": 0.1245, + "step": 660 + }, + { + "epoch": 1.2882292855855315, + "grad_norm": 0.7237009406089783, + "learning_rate": 1.638286767634353e-05, + "loss": 0.1258, + "step": 670 + }, + { + "epoch": 1.3074565883554647, + "grad_norm": 0.6398624181747437, + "learning_rate": 1.613014324148392e-05, + "loss": 0.1519, + "step": 680 + }, + { + "epoch": 1.326683891125398, + "grad_norm": 0.7676591873168945, + "learning_rate": 1.5875787593011784e-05, + "loss": 0.1545, + "step": 690 + }, + { + "epoch": 1.3305293516793848, + "eval_loss": 0.10604555904865265, + "eval_runtime": 203.0173, + "eval_samples_per_second": 5.28, + "eval_steps_per_second": 5.28, + "step": 692 + }, + { + "epoch": 1.3459111938953314, + "grad_norm": 0.5583875775337219, + "learning_rate": 1.5619915026250646e-05, + "loss": 0.1141, + "step": 700 + }, + { + "epoch": 1.3651384966652647, + "grad_norm": 0.5790243148803711, + "learning_rate": 1.536264051815491e-05, + "loss": 0.1326, + "step": 710 + }, + { + "epoch": 1.3843657994351979, + "grad_norm": 0.7467628121376038, + "learning_rate": 1.5104079675644706e-05, + "loss": 0.1439, + "step": 720 + }, + { + "epoch": 1.4035931022051313, + "grad_norm": 0.9867657423019409, + "learning_rate": 1.4844348683657616e-05, + "loss": 0.1385, + "step": 730 + }, + { + "epoch": 1.4228204049750646, + "grad_norm": 0.7909297347068787, + "learning_rate": 1.4583564252940735e-05, + "loss": 0.1259, + "step": 740 + }, + { + "epoch": 1.4420477077449978, + "grad_norm": 0.6159791350364685, + "learning_rate": 1.432184356760637e-05, + "loss": 0.1126, + "step": 750 + }, + { + "epoch": 1.4612750105149312, + "grad_norm": 0.6234619617462158, + "learning_rate": 1.4059304232475098e-05, + "loss": 0.1144, + "step": 760 + }, + { + "epoch": 1.4805023132848645, + "grad_norm": 0.7142959833145142, + "learning_rate": 1.3796064220229765e-05, + "loss": 0.1249, + "step": 770 + }, + { + "epoch": 1.4997296160547977, + "grad_norm": 0.6258341073989868, + "learning_rate": 1.3532241818404156e-05, + "loss": 0.1321, + "step": 780 + }, + { + "epoch": 1.5189569188247312, + "grad_norm": 0.5723307728767395, + "learning_rate": 1.326795557623022e-05, + "loss": 0.1193, + "step": 790 + }, + { + "epoch": 1.5381842215946644, + "grad_norm": 0.7454131841659546, + "learning_rate": 1.300332425136769e-05, + "loss": 0.1281, + "step": 800 + }, + { + "epoch": 1.5574115243645976, + "grad_norm": 0.5975070595741272, + "learning_rate": 1.273846675654003e-05, + "loss": 0.1321, + "step": 810 + }, + { + "epoch": 1.576638827134531, + "grad_norm": 0.7056507468223572, + "learning_rate": 1.2473502106100723e-05, + "loss": 0.1444, + "step": 820 + }, + { + "epoch": 1.5958661299044643, + "grad_norm": 0.7889280915260315, + "learning_rate": 1.2208549362553885e-05, + "loss": 0.1226, + "step": 830 + }, + { + "epoch": 1.6150934326743975, + "grad_norm": 0.7041313648223877, + "learning_rate": 1.194372758305325e-05, + "loss": 0.1316, + "step": 840 + }, + { + "epoch": 1.634320735444331, + "grad_norm": 0.7797935605049133, + "learning_rate": 1.1679155765903524e-05, + "loss": 0.132, + "step": 850 + }, + { + "epoch": 1.6535480382142642, + "grad_norm": 0.6426231861114502, + "learning_rate": 1.1414952797088248e-05, + "loss": 0.1101, + "step": 860 + }, + { + "epoch": 1.663161689599231, + "eval_loss": 0.10293085128068924, + "eval_runtime": 203.1567, + "eval_samples_per_second": 5.277, + "eval_steps_per_second": 5.277, + "step": 865 + }, + { + "epoch": 1.6727753409841974, + "grad_norm": 1.0461760759353638, + "learning_rate": 1.1151237396848058e-05, + "loss": 0.128, + "step": 870 + }, + { + "epoch": 1.692002643754131, + "grad_norm": 0.8692240118980408, + "learning_rate": 1.088812806633349e-05, + "loss": 0.1114, + "step": 880 + }, + { + "epoch": 1.7112299465240641, + "grad_norm": 0.5583866238594055, + "learning_rate": 1.0625743034356183e-05, + "loss": 0.1309, + "step": 890 + }, + { + "epoch": 1.7304572492939974, + "grad_norm": 0.5476118922233582, + "learning_rate": 1.0364200204262473e-05, + "loss": 0.1156, + "step": 900 + }, + { + "epoch": 1.7496845520639308, + "grad_norm": 0.8960713148117065, + "learning_rate": 1.0103617100953274e-05, + "loss": 0.1305, + "step": 910 + }, + { + "epoch": 1.768911854833864, + "grad_norm": 0.6927953958511353, + "learning_rate": 9.84411081807393e-06, + "loss": 0.1245, + "step": 920 + }, + { + "epoch": 1.7881391576037973, + "grad_norm": 0.5891989469528198, + "learning_rate": 9.585797965397949e-06, + "loss": 0.1125, + "step": 930 + }, + { + "epoch": 1.8073664603737307, + "grad_norm": 0.8319947123527527, + "learning_rate": 9.328794616428092e-06, + "loss": 0.1462, + "step": 940 + }, + { + "epoch": 1.826593763143664, + "grad_norm": 0.7439499497413635, + "learning_rate": 9.073216256238485e-06, + "loss": 0.1167, + "step": 950 + }, + { + "epoch": 1.8458210659135972, + "grad_norm": 0.7593638896942139, + "learning_rate": 8.8191777295811e-06, + "loss": 0.1356, + "step": 960 + }, + { + "epoch": 1.8650483686835306, + "grad_norm": 0.871376097202301, + "learning_rate": 8.56679318928e-06, + "loss": 0.1173, + "step": 970 + }, + { + "epoch": 1.884275671453464, + "grad_norm": 0.8772872090339661, + "learning_rate": 8.31617604493651e-06, + "loss": 0.1347, + "step": 980 + }, + { + "epoch": 1.903502974223397, + "grad_norm": 0.6309168934822083, + "learning_rate": 8.067438911968305e-06, + "loss": 0.1382, + "step": 990 + }, + { + "epoch": 1.9227302769933305, + "grad_norm": 0.775113046169281, + "learning_rate": 7.820693561005429e-06, + "loss": 0.1368, + "step": 1000 + }, + { + "epoch": 1.941957579763264, + "grad_norm": 0.9096739888191223, + "learning_rate": 7.576050867665876e-06, + "loss": 0.1263, + "step": 1010 + }, + { + "epoch": 1.961184882533197, + "grad_norm": 0.7637848258018494, + "learning_rate": 7.333620762733376e-06, + "loss": 0.1148, + "step": 1020 + }, + { + "epoch": 1.9804121853031305, + "grad_norm": 0.8084997534751892, + "learning_rate": 7.0935121827597245e-06, + "loss": 0.1457, + "step": 1030 + }, + { + "epoch": 1.995794027519077, + "eval_loss": 0.10069960355758667, + "eval_runtime": 203.0573, + "eval_samples_per_second": 5.279, + "eval_steps_per_second": 5.279, + "step": 1038 + } + ], + "logging_steps": 10, + "max_steps": 1560, + "num_input_tokens_seen": 0, + "num_train_epochs": 3, + "save_steps": 173, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 9.065700587225088e+17, + "train_batch_size": 1, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-1038/training_args.bin b/checkpoint-1038/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..80c5d5c3761b0cd827095650fdc744b87401c78b --- /dev/null +++ b/checkpoint-1038/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:70ce65338e377750e8ed719c3c255445a47816429c580e0d811222e2d91c722b +size 5624 diff --git a/checkpoint-1211/README.md b/checkpoint-1211/README.md new file mode 100644 index 0000000000000000000000000000000000000000..20c165c7feaa880a18e0662941a5b246a698be2e --- /dev/null +++ b/checkpoint-1211/README.md @@ -0,0 +1,209 @@ +--- +base_model: Salesforce/Llama-xLAM-2-8b-fc-r +library_name: peft +pipeline_tag: text-generation +tags: +- base_model:adapter:Salesforce/Llama-xLAM-2-8b-fc-r +- lora +- sft +- transformers +- trl +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.17.1 \ No newline at end of file diff --git a/checkpoint-1211/adapter_config.json b/checkpoint-1211/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..53e6fe7efe54ad6e008096a354e97649fac2cc73 --- /dev/null +++ b/checkpoint-1211/adapter_config.json @@ -0,0 +1,39 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "Salesforce/Llama-xLAM-2-8b-fc-r", + "bias": "none", + "corda_config": null, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 32, + "lora_bias": false, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "qalora_group_size": 16, + "r": 16, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "k_proj", + "q_proj", + "o_proj", + "v_proj" + ], + "target_parameters": null, + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/checkpoint-1211/adapter_model.safetensors b/checkpoint-1211/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..fc999c068c113b82c64605771fb60b200d314c95 --- /dev/null +++ b/checkpoint-1211/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5319ccdfef706acfac4a219adbf98ea5cf67bc454e78bb1dc2cbbfdcc3e0e015 +size 54560368 diff --git a/checkpoint-1211/optimizer.pt b/checkpoint-1211/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..514b23d8ecd974cb108ad0da690c7e336c480ad8 --- /dev/null +++ b/checkpoint-1211/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:030255b7d15cff589355b07ea7f96b007e27b27e549bed5a51496dfb489d7393 +size 109267450 diff --git a/checkpoint-1211/rng_state.pth b/checkpoint-1211/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..832755ee759717bd5938158287bcc32ae5346389 --- /dev/null +++ b/checkpoint-1211/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:25611d577bcb0df3af17e0256ad2cc986038e18c4e8c84c4b9bb7b97ce965e43 +size 14244 diff --git a/checkpoint-1211/scheduler.pt b/checkpoint-1211/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..889acd9667c0678d67711e6ce2471f57feff3c2f --- /dev/null +++ b/checkpoint-1211/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:baaf4f9f8f7dc20d207905f167b1322b562dee22187054077907360642d35010 +size 1064 diff --git a/checkpoint-1211/special_tokens_map.json b/checkpoint-1211/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..1c69f55fd7bb10288d16d4c76497e21e25adfe4f --- /dev/null +++ b/checkpoint-1211/special_tokens_map.json @@ -0,0 +1,27 @@ +{ + "additional_special_tokens": [ + "<|eot_id|>", + "<|eom_id|>" + ], + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/checkpoint-1211/tokenizer.json b/checkpoint-1211/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..1c1d8d5c9024994f1d3b00f9662b8dd89ca13cf2 --- /dev/null +++ b/checkpoint-1211/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b9e4e7fb171f92fd137b777cc2714bf87d11576700a1dcd7a399e7bbe39537b +size 17209920 diff --git a/checkpoint-1211/tokenizer_config.json b/checkpoint-1211/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..3c3fc3b2c234aa8b39e8ee01e3ee9e6fe54a2a1f --- /dev/null +++ b/checkpoint-1211/tokenizer_config.json @@ -0,0 +1,2070 @@ +{ + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|python_tag|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "additional_special_tokens": [ + "<|eot_id|>", + "<|eom_id|>" + ], + "bos_token": "<|begin_of_text|>", + "chat_template": "{{- bos_token }}\n{%- if custom_tools is defined %}\n {%- set tools = custom_tools %}\n{%- endif %}\n{%- if not tools_in_user_message is defined %}\n {%- set tools_in_user_message = true %}\n{%- endif %}\n{%- if not tools is defined %}\n {%- set tools = none %}\n{%- endif %}\n\n{#- Extract system message #}\n{{- \"<|start_header_id|>system<|end_header_id|>\\n\\n\" }}\n{%- if messages[0]['role'] == 'system' %}\n {%- set system_message = messages[0]['content'] | trim %}\n {%- set messages = messages[1:] %}\n {{- system_message + \"\\n\" }}\n{%- else %}\n {%- set system_message = \"You are a helpful assistant that can use tools. You are developed by Salesforce xLAM team.\" %}\n {% set format_instruction %}You have access to a set of tools. When using tools, make calls in a single JSON array: \n\n[{\"name\": \"tool_call_name\", \"arguments\": {\"arg1\": \"value1\", \"arg2\": \"value2\"}}, ... (additional parallel tool calls as needed)]\n\nIf no tool is suitable, state that explicitly. If the user's input lacks required parameters, ask for clarification. Do not interpret or respond until tool results are returned. Once they are available, process them or make additional calls if needed. For tasks that don't require tools, such as casual conversation or general advice, respond directly in plain text. The available tools are:{% endset %}\n {{- system_message + \"\\n\" }}\n {%- if tools is not none %}\n {{- format_instruction + \"\\n\\n\" }}\n {%- endif %}\n{%- endif %}\n\n\n{%- if tools is not none %}\n {%- for t in tools %}\n {{- t | tojson(indent=4) }}\n {{- \"\\n\\n\" }}\n {%- endfor %}\n{%- endif %}\n{{- \"<|eot_id|>\" }}\n\n{%- for message in messages %}\n {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %}\n {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\\n\\n'+ message['content'] | trim + '<|eot_id|>' }}\n {%- elif 'tool_calls' in message %}\n {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' -}}\n {%- if message['tool_calls'] %}\n {{- \"[\" }}\n {%- for tool_call_function in message.tool_calls %}\n {%- set tool_call = tool_call_function.function %}\n {{- '{\"name\": \"' + tool_call.name + '\", ' }}\n {{- '\"arguments\": ' }}\n {{- tool_call.arguments | tojson }}\n {{- \"}\" }}\n {%- if not loop.last %}\n {{- \", \" }}\n {%- endif %}\n {%- endfor %}\n {{- \"]\" }}\n {{- \"<|eot_id|>\" }}\n {%- elif message['content'] %}\n {{- message['content'] | trim + '<|eot_id|>' }}\n {%- else %}\n {{- \"[]\\n\" + '<|eot_id|>' }}\n {%- endif %}\n {%- elif message.role == \"tool\" or message.role == \"ipython\" %}\n {{- \"<|start_header_id|>\" + \"ipython\" + \"<|end_header_id|>\\n\\n\" }}\n {%- set content = message[\"content\"] %}\n {%- if content is mapping or (content is iterable and content is not string) %}\n {{- content | tojson }}\n {%- else %}\n {{- content }}\n {%- endif %}\n {{- \"<|eot_id|>\" }}\n {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' }}\n{%- endif %}", + "clean_up_tokenization_spaces": true, + "eos_token": "<|eot_id|>", + "extra_special_tokens": {}, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 16384, + "pad_token": "<|eot_id|>", + "padding_side": "right", + "split_special_tokens": false, + "tokenizer_class": "PreTrainedTokenizerFast" +} diff --git a/checkpoint-1211/trainer_state.json b/checkpoint-1211/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..a830fa1cd9b589a86340bc567de03f02f4890243 --- /dev/null +++ b/checkpoint-1211/trainer_state.json @@ -0,0 +1,936 @@ +{ + "best_metric": 0.10013294219970703, + "best_model_checkpoint": "./xlam_lora_new_2560_1_delete_over_size_3epoch_multi/checkpoint-1211", + "epoch": 2.3284263654389235, + "eval_steps": 173, + "global_step": 1211, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.019227302769933306, + "grad_norm": 0.8050442337989807, + "learning_rate": 3.205128205128205e-06, + "loss": 0.6642, + "step": 10 + }, + { + "epoch": 0.03845460553986661, + "grad_norm": 0.45196670293807983, + "learning_rate": 6.41025641025641e-06, + "loss": 0.6377, + "step": 20 + }, + { + "epoch": 0.05768190830979992, + "grad_norm": 0.47538116574287415, + "learning_rate": 9.615384615384616e-06, + "loss": 0.6205, + "step": 30 + }, + { + "epoch": 0.07690921107973323, + "grad_norm": 0.34218236804008484, + "learning_rate": 1.282051282051282e-05, + "loss": 0.5579, + "step": 40 + }, + { + "epoch": 0.09613651384966652, + "grad_norm": 0.38529354333877563, + "learning_rate": 1.602564102564103e-05, + "loss": 0.4393, + "step": 50 + }, + { + "epoch": 0.11536381661959984, + "grad_norm": 0.3676348924636841, + "learning_rate": 1.923076923076923e-05, + "loss": 0.3965, + "step": 60 + }, + { + "epoch": 0.13459111938953314, + "grad_norm": 0.3427989184856415, + "learning_rate": 2.2435897435897437e-05, + "loss": 0.3488, + "step": 70 + }, + { + "epoch": 0.15381842215946645, + "grad_norm": 0.30555886030197144, + "learning_rate": 2.4999887657859027e-05, + "loss": 0.2224, + "step": 80 + }, + { + "epoch": 0.17304572492939974, + "grad_norm": 0.3501119315624237, + "learning_rate": 2.4995955894949523e-05, + "loss": 0.248, + "step": 90 + }, + { + "epoch": 0.19227302769933305, + "grad_norm": 0.36164987087249756, + "learning_rate": 2.4986409044149163e-05, + "loss": 0.2322, + "step": 100 + }, + { + "epoch": 0.21150033046926636, + "grad_norm": 0.3375028967857361, + "learning_rate": 2.4971251395358342e-05, + "loss": 0.2427, + "step": 110 + }, + { + "epoch": 0.23072763323919968, + "grad_norm": 0.3321882486343384, + "learning_rate": 2.495048975970308e-05, + "loss": 0.1967, + "step": 120 + }, + { + "epoch": 0.24995493600913296, + "grad_norm": 0.2828320562839508, + "learning_rate": 2.492413346647437e-05, + "loss": 0.1636, + "step": 130 + }, + { + "epoch": 0.2691822387790663, + "grad_norm": 0.3430372476577759, + "learning_rate": 2.4892194358936095e-05, + "loss": 0.2041, + "step": 140 + }, + { + "epoch": 0.28840954154899956, + "grad_norm": 0.3393559455871582, + "learning_rate": 2.4854686789003173e-05, + "loss": 0.1911, + "step": 150 + }, + { + "epoch": 0.3076368443189329, + "grad_norm": 0.36908936500549316, + "learning_rate": 2.4811627610792543e-05, + "loss": 0.2025, + "step": 160 + }, + { + "epoch": 0.3268641470888662, + "grad_norm": 0.38679710030555725, + "learning_rate": 2.4763036173049677e-05, + "loss": 0.1566, + "step": 170 + }, + { + "epoch": 0.3326323379198462, + "eval_loss": 0.14381718635559082, + "eval_runtime": 202.8561, + "eval_samples_per_second": 5.285, + "eval_steps_per_second": 5.285, + "step": 173 + }, + { + "epoch": 0.34609144985879947, + "grad_norm": 0.39648687839508057, + "learning_rate": 2.4708934310454207e-05, + "loss": 0.1943, + "step": 180 + }, + { + "epoch": 0.3653187526287328, + "grad_norm": 0.3979399800300598, + "learning_rate": 2.4649346333808458e-05, + "loss": 0.1594, + "step": 190 + }, + { + "epoch": 0.3845460553986661, + "grad_norm": 0.34854283928871155, + "learning_rate": 2.458429901911331e-05, + "loss": 0.1683, + "step": 200 + }, + { + "epoch": 0.40377335816859944, + "grad_norm": 0.26675811409950256, + "learning_rate": 2.4513821595536356e-05, + "loss": 0.1616, + "step": 210 + }, + { + "epoch": 0.4230006609385327, + "grad_norm": 0.4399104118347168, + "learning_rate": 2.44379457322777e-05, + "loss": 0.1664, + "step": 220 + }, + { + "epoch": 0.442227963708466, + "grad_norm": 0.5316939353942871, + "learning_rate": 2.4356705524339317e-05, + "loss": 0.1745, + "step": 230 + }, + { + "epoch": 0.46145526647839935, + "grad_norm": 0.5996547341346741, + "learning_rate": 2.4270137477204408e-05, + "loss": 0.1753, + "step": 240 + }, + { + "epoch": 0.48068256924833264, + "grad_norm": 0.4330001175403595, + "learning_rate": 2.417828049043353e-05, + "loss": 0.1997, + "step": 250 + }, + { + "epoch": 0.4999098720182659, + "grad_norm": 0.4255751073360443, + "learning_rate": 2.4081175840185022e-05, + "loss": 0.1728, + "step": 260 + }, + { + "epoch": 0.5191371747881992, + "grad_norm": 0.536382257938385, + "learning_rate": 2.3978867160667457e-05, + "loss": 0.147, + "step": 270 + }, + { + "epoch": 0.5383644775581325, + "grad_norm": 0.5623698830604553, + "learning_rate": 2.3871400424532493e-05, + "loss": 0.1863, + "step": 280 + }, + { + "epoch": 0.5575917803280659, + "grad_norm": 0.49679550528526306, + "learning_rate": 2.375882392221695e-05, + "loss": 0.1685, + "step": 290 + }, + { + "epoch": 0.5768190830979991, + "grad_norm": 0.5784851908683777, + "learning_rate": 2.36411882402434e-05, + "loss": 0.1506, + "step": 300 + }, + { + "epoch": 0.5960463858679325, + "grad_norm": 0.6098183393478394, + "learning_rate": 2.3518546238489e-05, + "loss": 0.1565, + "step": 310 + }, + { + "epoch": 0.6152736886378658, + "grad_norm": 0.5198598504066467, + "learning_rate": 2.339095302643273e-05, + "loss": 0.1433, + "step": 320 + }, + { + "epoch": 0.634500991407799, + "grad_norm": 0.5796005129814148, + "learning_rate": 2.325846593839188e-05, + "loss": 0.1668, + "step": 330 + }, + { + "epoch": 0.6537282941777324, + "grad_norm": 0.6006646752357483, + "learning_rate": 2.312114450775869e-05, + "loss": 0.1505, + "step": 340 + }, + { + "epoch": 0.6652646758396924, + "eval_loss": 0.1198095753788948, + "eval_runtime": 202.8938, + "eval_samples_per_second": 5.284, + "eval_steps_per_second": 5.284, + "step": 346 + }, + { + "epoch": 0.6729555969476657, + "grad_norm": 0.5787773728370667, + "learning_rate": 2.2979050440248896e-05, + "loss": 0.1442, + "step": 350 + }, + { + "epoch": 0.6921828997175989, + "grad_norm": 0.5230283141136169, + "learning_rate": 2.2832247586174118e-05, + "loss": 0.1555, + "step": 360 + }, + { + "epoch": 0.7114102024875323, + "grad_norm": 0.5551069378852844, + "learning_rate": 2.2680801911750558e-05, + "loss": 0.1422, + "step": 370 + }, + { + "epoch": 0.7306375052574656, + "grad_norm": 0.5769614577293396, + "learning_rate": 2.2524781469456928e-05, + "loss": 0.165, + "step": 380 + }, + { + "epoch": 0.7498648080273989, + "grad_norm": 0.6609200239181519, + "learning_rate": 2.2364256367454922e-05, + "loss": 0.161, + "step": 390 + }, + { + "epoch": 0.7690921107973322, + "grad_norm": 0.5530131459236145, + "learning_rate": 2.2199298738085907e-05, + "loss": 0.1709, + "step": 400 + }, + { + "epoch": 0.7883194135672655, + "grad_norm": 0.7019795775413513, + "learning_rate": 2.2029982705458107e-05, + "loss": 0.1471, + "step": 410 + }, + { + "epoch": 0.8075467163371989, + "grad_norm": 0.5327528715133667, + "learning_rate": 2.1856384352138765e-05, + "loss": 0.1913, + "step": 420 + }, + { + "epoch": 0.8267740191071321, + "grad_norm": 0.5548112988471985, + "learning_rate": 2.1678581684966235e-05, + "loss": 0.1509, + "step": 430 + }, + { + "epoch": 0.8460013218770654, + "grad_norm": 0.51619553565979, + "learning_rate": 2.149665459999743e-05, + "loss": 0.1341, + "step": 440 + }, + { + "epoch": 0.8652286246469988, + "grad_norm": 0.6642457842826843, + "learning_rate": 2.1310684846606346e-05, + "loss": 0.1458, + "step": 450 + }, + { + "epoch": 0.884455927416932, + "grad_norm": 0.48370271921157837, + "learning_rate": 2.1120755990749762e-05, + "loss": 0.1584, + "step": 460 + }, + { + "epoch": 0.9036832301868654, + "grad_norm": 0.8130201697349548, + "learning_rate": 2.092695337741671e-05, + "loss": 0.1389, + "step": 470 + }, + { + "epoch": 0.9229105329567987, + "grad_norm": 0.4986889958381653, + "learning_rate": 2.0729364092278456e-05, + "loss": 0.1263, + "step": 480 + }, + { + "epoch": 0.9421378357267319, + "grad_norm": 0.6791219711303711, + "learning_rate": 2.052807692255638e-05, + "loss": 0.1562, + "step": 490 + }, + { + "epoch": 0.9613651384966653, + "grad_norm": 0.6069239974021912, + "learning_rate": 2.0323182317125198e-05, + "loss": 0.1296, + "step": 500 + }, + { + "epoch": 0.9805924412665986, + "grad_norm": 0.6993957161903381, + "learning_rate": 2.011477234586957e-05, + "loss": 0.1695, + "step": 510 + }, + { + "epoch": 0.9978970137595385, + "eval_loss": 0.11108512431383133, + "eval_runtime": 202.9151, + "eval_samples_per_second": 5.283, + "eval_steps_per_second": 5.283, + "step": 519 + }, + { + "epoch": 0.9998197440365318, + "grad_norm": 0.5495030283927917, + "learning_rate": 1.9902940658312253e-05, + "loss": 0.1512, + "step": 520 + }, + { + "epoch": 1.0190470468064652, + "grad_norm": 0.5100754499435425, + "learning_rate": 1.968778244153246e-05, + "loss": 0.1088, + "step": 530 + }, + { + "epoch": 1.0382743495763984, + "grad_norm": 0.6836853623390198, + "learning_rate": 1.9469394377393335e-05, + "loss": 0.1524, + "step": 540 + }, + { + "epoch": 1.0575016523463319, + "grad_norm": 0.5304776430130005, + "learning_rate": 1.9247874599097714e-05, + "loss": 0.1239, + "step": 550 + }, + { + "epoch": 1.076728955116265, + "grad_norm": 0.6995298862457275, + "learning_rate": 1.9023322647091736e-05, + "loss": 0.1203, + "step": 560 + }, + { + "epoch": 1.0959562578861983, + "grad_norm": 0.579207181930542, + "learning_rate": 1.8795839424336097e-05, + "loss": 0.134, + "step": 570 + }, + { + "epoch": 1.1151835606561318, + "grad_norm": 0.4746134877204895, + "learning_rate": 1.8565527150965077e-05, + "loss": 0.1344, + "step": 580 + }, + { + "epoch": 1.134410863426065, + "grad_norm": 0.8127744793891907, + "learning_rate": 1.8332489318353655e-05, + "loss": 0.1157, + "step": 590 + }, + { + "epoch": 1.1536381661959982, + "grad_norm": 0.6949151158332825, + "learning_rate": 1.809683064261343e-05, + "loss": 0.1197, + "step": 600 + }, + { + "epoch": 1.1728654689659317, + "grad_norm": 0.6869731545448303, + "learning_rate": 1.7858657017538178e-05, + "loss": 0.1392, + "step": 610 + }, + { + "epoch": 1.192092771735865, + "grad_norm": 0.7461158037185669, + "learning_rate": 1.7618075467020213e-05, + "loss": 0.1262, + "step": 620 + }, + { + "epoch": 1.2113200745057981, + "grad_norm": 0.5442166924476624, + "learning_rate": 1.7375194096958946e-05, + "loss": 0.1258, + "step": 630 + }, + { + "epoch": 1.2305473772757316, + "grad_norm": 0.7670741081237793, + "learning_rate": 1.713012204668325e-05, + "loss": 0.1204, + "step": 640 + }, + { + "epoch": 1.2497746800456648, + "grad_norm": 0.3919640779495239, + "learning_rate": 1.6882969439909434e-05, + "loss": 0.1444, + "step": 650 + }, + { + "epoch": 1.269001982815598, + "grad_norm": 0.6234434247016907, + "learning_rate": 1.663384733525686e-05, + "loss": 0.1245, + "step": 660 + }, + { + "epoch": 1.2882292855855315, + "grad_norm": 0.7237009406089783, + "learning_rate": 1.638286767634353e-05, + "loss": 0.1258, + "step": 670 + }, + { + "epoch": 1.3074565883554647, + "grad_norm": 0.6398624181747437, + "learning_rate": 1.613014324148392e-05, + "loss": 0.1519, + "step": 680 + }, + { + "epoch": 1.326683891125398, + "grad_norm": 0.7676591873168945, + "learning_rate": 1.5875787593011784e-05, + "loss": 0.1545, + "step": 690 + }, + { + "epoch": 1.3305293516793848, + "eval_loss": 0.10604555904865265, + "eval_runtime": 203.0173, + "eval_samples_per_second": 5.28, + "eval_steps_per_second": 5.28, + "step": 692 + }, + { + "epoch": 1.3459111938953314, + "grad_norm": 0.5583875775337219, + "learning_rate": 1.5619915026250646e-05, + "loss": 0.1141, + "step": 700 + }, + { + "epoch": 1.3651384966652647, + "grad_norm": 0.5790243148803711, + "learning_rate": 1.536264051815491e-05, + "loss": 0.1326, + "step": 710 + }, + { + "epoch": 1.3843657994351979, + "grad_norm": 0.7467628121376038, + "learning_rate": 1.5104079675644706e-05, + "loss": 0.1439, + "step": 720 + }, + { + "epoch": 1.4035931022051313, + "grad_norm": 0.9867657423019409, + "learning_rate": 1.4844348683657616e-05, + "loss": 0.1385, + "step": 730 + }, + { + "epoch": 1.4228204049750646, + "grad_norm": 0.7909297347068787, + "learning_rate": 1.4583564252940735e-05, + "loss": 0.1259, + "step": 740 + }, + { + "epoch": 1.4420477077449978, + "grad_norm": 0.6159791350364685, + "learning_rate": 1.432184356760637e-05, + "loss": 0.1126, + "step": 750 + }, + { + "epoch": 1.4612750105149312, + "grad_norm": 0.6234619617462158, + "learning_rate": 1.4059304232475098e-05, + "loss": 0.1144, + "step": 760 + }, + { + "epoch": 1.4805023132848645, + "grad_norm": 0.7142959833145142, + "learning_rate": 1.3796064220229765e-05, + "loss": 0.1249, + "step": 770 + }, + { + "epoch": 1.4997296160547977, + "grad_norm": 0.6258341073989868, + "learning_rate": 1.3532241818404156e-05, + "loss": 0.1321, + "step": 780 + }, + { + "epoch": 1.5189569188247312, + "grad_norm": 0.5723307728767395, + "learning_rate": 1.326795557623022e-05, + "loss": 0.1193, + "step": 790 + }, + { + "epoch": 1.5381842215946644, + "grad_norm": 0.7454131841659546, + "learning_rate": 1.300332425136769e-05, + "loss": 0.1281, + "step": 800 + }, + { + "epoch": 1.5574115243645976, + "grad_norm": 0.5975070595741272, + "learning_rate": 1.273846675654003e-05, + "loss": 0.1321, + "step": 810 + }, + { + "epoch": 1.576638827134531, + "grad_norm": 0.7056507468223572, + "learning_rate": 1.2473502106100723e-05, + "loss": 0.1444, + "step": 820 + }, + { + "epoch": 1.5958661299044643, + "grad_norm": 0.7889280915260315, + "learning_rate": 1.2208549362553885e-05, + "loss": 0.1226, + "step": 830 + }, + { + "epoch": 1.6150934326743975, + "grad_norm": 0.7041313648223877, + "learning_rate": 1.194372758305325e-05, + "loss": 0.1316, + "step": 840 + }, + { + "epoch": 1.634320735444331, + "grad_norm": 0.7797935605049133, + "learning_rate": 1.1679155765903524e-05, + "loss": 0.132, + "step": 850 + }, + { + "epoch": 1.6535480382142642, + "grad_norm": 0.6426231861114502, + "learning_rate": 1.1414952797088248e-05, + "loss": 0.1101, + "step": 860 + }, + { + "epoch": 1.663161689599231, + "eval_loss": 0.10293085128068924, + "eval_runtime": 203.1567, + "eval_samples_per_second": 5.277, + "eval_steps_per_second": 5.277, + "step": 865 + }, + { + "epoch": 1.6727753409841974, + "grad_norm": 1.0461760759353638, + "learning_rate": 1.1151237396848058e-05, + "loss": 0.128, + "step": 870 + }, + { + "epoch": 1.692002643754131, + "grad_norm": 0.8692240118980408, + "learning_rate": 1.088812806633349e-05, + "loss": 0.1114, + "step": 880 + }, + { + "epoch": 1.7112299465240641, + "grad_norm": 0.5583866238594055, + "learning_rate": 1.0625743034356183e-05, + "loss": 0.1309, + "step": 890 + }, + { + "epoch": 1.7304572492939974, + "grad_norm": 0.5476118922233582, + "learning_rate": 1.0364200204262473e-05, + "loss": 0.1156, + "step": 900 + }, + { + "epoch": 1.7496845520639308, + "grad_norm": 0.8960713148117065, + "learning_rate": 1.0103617100953274e-05, + "loss": 0.1305, + "step": 910 + }, + { + "epoch": 1.768911854833864, + "grad_norm": 0.6927953958511353, + "learning_rate": 9.84411081807393e-06, + "loss": 0.1245, + "step": 920 + }, + { + "epoch": 1.7881391576037973, + "grad_norm": 0.5891989469528198, + "learning_rate": 9.585797965397949e-06, + "loss": 0.1125, + "step": 930 + }, + { + "epoch": 1.8073664603737307, + "grad_norm": 0.8319947123527527, + "learning_rate": 9.328794616428092e-06, + "loss": 0.1462, + "step": 940 + }, + { + "epoch": 1.826593763143664, + "grad_norm": 0.7439499497413635, + "learning_rate": 9.073216256238485e-06, + "loss": 0.1167, + "step": 950 + }, + { + "epoch": 1.8458210659135972, + "grad_norm": 0.7593638896942139, + "learning_rate": 8.8191777295811e-06, + "loss": 0.1356, + "step": 960 + }, + { + "epoch": 1.8650483686835306, + "grad_norm": 0.871376097202301, + "learning_rate": 8.56679318928e-06, + "loss": 0.1173, + "step": 970 + }, + { + "epoch": 1.884275671453464, + "grad_norm": 0.8772872090339661, + "learning_rate": 8.31617604493651e-06, + "loss": 0.1347, + "step": 980 + }, + { + "epoch": 1.903502974223397, + "grad_norm": 0.6309168934822083, + "learning_rate": 8.067438911968305e-06, + "loss": 0.1382, + "step": 990 + }, + { + "epoch": 1.9227302769933305, + "grad_norm": 0.775113046169281, + "learning_rate": 7.820693561005429e-06, + "loss": 0.1368, + "step": 1000 + }, + { + "epoch": 1.941957579763264, + "grad_norm": 0.9096739888191223, + "learning_rate": 7.576050867665876e-06, + "loss": 0.1263, + "step": 1010 + }, + { + "epoch": 1.961184882533197, + "grad_norm": 0.7637848258018494, + "learning_rate": 7.333620762733376e-06, + "loss": 0.1148, + "step": 1020 + }, + { + "epoch": 1.9804121853031305, + "grad_norm": 0.8084997534751892, + "learning_rate": 7.0935121827597245e-06, + "loss": 0.1457, + "step": 1030 + }, + { + "epoch": 1.995794027519077, + "eval_loss": 0.10069960355758667, + "eval_runtime": 203.0573, + "eval_samples_per_second": 5.279, + "eval_steps_per_second": 5.279, + "step": 1038 + }, + { + "epoch": 1.999639488073064, + "grad_norm": 1.0884274244308472, + "learning_rate": 6.855833021113886e-06, + "loss": 0.1641, + "step": 1040 + }, + { + "epoch": 2.018866790842997, + "grad_norm": 0.702237069606781, + "learning_rate": 6.620690079499835e-06, + "loss": 0.1159, + "step": 1050 + }, + { + "epoch": 2.0380940936129304, + "grad_norm": 0.6377178430557251, + "learning_rate": 6.388189019964976e-06, + "loss": 0.1103, + "step": 1060 + }, + { + "epoch": 2.057321396382864, + "grad_norm": 0.8843504786491394, + "learning_rate": 6.158434317420636e-06, + "loss": 0.1178, + "step": 1070 + }, + { + "epoch": 2.076548699152797, + "grad_norm": 0.42746174335479736, + "learning_rate": 5.931529212695996e-06, + "loss": 0.1143, + "step": 1080 + }, + { + "epoch": 2.0957760019227303, + "grad_norm": 0.7449749708175659, + "learning_rate": 5.70757566614661e-06, + "loss": 0.1262, + "step": 1090 + }, + { + "epoch": 2.1150033046926637, + "grad_norm": 0.6538805961608887, + "learning_rate": 5.48667431183824e-06, + "loss": 0.1344, + "step": 1100 + }, + { + "epoch": 2.1342306074625967, + "grad_norm": 0.8034993410110474, + "learning_rate": 5.268924412326709e-06, + "loss": 0.1447, + "step": 1110 + }, + { + "epoch": 2.15345791023253, + "grad_norm": 0.7438477277755737, + "learning_rate": 5.054423814054049e-06, + "loss": 0.1082, + "step": 1120 + }, + { + "epoch": 2.1726852130024636, + "grad_norm": 0.5646623373031616, + "learning_rate": 4.843268903380932e-06, + "loss": 0.1199, + "step": 1130 + }, + { + "epoch": 2.1919125157723967, + "grad_norm": 0.9965047240257263, + "learning_rate": 4.6355545632752575e-06, + "loss": 0.1303, + "step": 1140 + }, + { + "epoch": 2.21113981854233, + "grad_norm": 0.8709131479263306, + "learning_rate": 4.4313741306762495e-06, + "loss": 0.1107, + "step": 1150 + }, + { + "epoch": 2.2303671213122636, + "grad_norm": 0.6653530597686768, + "learning_rate": 4.230819354553279e-06, + "loss": 0.1053, + "step": 1160 + }, + { + "epoch": 2.2495944240821966, + "grad_norm": 0.766173243522644, + "learning_rate": 4.033980354678239e-06, + "loss": 0.1017, + "step": 1170 + }, + { + "epoch": 2.26882172685213, + "grad_norm": 0.5112572312355042, + "learning_rate": 3.840945581130008e-06, + "loss": 0.109, + "step": 1180 + }, + { + "epoch": 2.2880490296220635, + "grad_norm": 0.8744060397148132, + "learning_rate": 3.651801774549213e-06, + "loss": 0.1026, + "step": 1190 + }, + { + "epoch": 2.3072763323919965, + "grad_norm": 0.8215727806091309, + "learning_rate": 3.4666339271610836e-06, + "loss": 0.1058, + "step": 1200 + }, + { + "epoch": 2.32650363516193, + "grad_norm": 0.6597920656204224, + "learning_rate": 3.285525244584017e-06, + "loss": 0.1378, + "step": 1210 + }, + { + "epoch": 2.3284263654389235, + "eval_loss": 0.10013294219970703, + "eval_runtime": 203.5302, + "eval_samples_per_second": 5.267, + "eval_steps_per_second": 5.267, + "step": 1211 + } + ], + "logging_steps": 10, + "max_steps": 1560, + "num_input_tokens_seen": 0, + "num_train_epochs": 3, + "save_steps": 173, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 1.0590178672823501e+18, + "train_batch_size": 1, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-1211/training_args.bin b/checkpoint-1211/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..80c5d5c3761b0cd827095650fdc744b87401c78b --- /dev/null +++ b/checkpoint-1211/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:70ce65338e377750e8ed719c3c255445a47816429c580e0d811222e2d91c722b +size 5624 diff --git a/checkpoint-1384/README.md b/checkpoint-1384/README.md new file mode 100644 index 0000000000000000000000000000000000000000..20c165c7feaa880a18e0662941a5b246a698be2e --- /dev/null +++ b/checkpoint-1384/README.md @@ -0,0 +1,209 @@ +--- +base_model: Salesforce/Llama-xLAM-2-8b-fc-r +library_name: peft +pipeline_tag: text-generation +tags: +- base_model:adapter:Salesforce/Llama-xLAM-2-8b-fc-r +- lora +- sft +- transformers +- trl +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.17.1 \ No newline at end of file diff --git a/checkpoint-1384/adapter_config.json b/checkpoint-1384/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..53e6fe7efe54ad6e008096a354e97649fac2cc73 --- /dev/null +++ b/checkpoint-1384/adapter_config.json @@ -0,0 +1,39 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "Salesforce/Llama-xLAM-2-8b-fc-r", + "bias": "none", + "corda_config": null, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 32, + "lora_bias": false, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "qalora_group_size": 16, + "r": 16, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "k_proj", + "q_proj", + "o_proj", + "v_proj" + ], + "target_parameters": null, + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/checkpoint-1384/adapter_model.safetensors b/checkpoint-1384/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..223aba4282e063aecda0254ee6413b4b905c1d65 --- /dev/null +++ b/checkpoint-1384/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d93077484e8ade289587dd40ae63ae2f049621fb9faa5943ab1a4ef383acafab +size 54560368 diff --git a/checkpoint-1384/optimizer.pt b/checkpoint-1384/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..8248bb2508ed397c1e8d71f9d5c2a89aaaf50833 --- /dev/null +++ b/checkpoint-1384/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bf49105f713928ee481fb179ea59a37a7bf77f976d94d4d57f52bcdd055da460 +size 109267450 diff --git a/checkpoint-1384/rng_state.pth b/checkpoint-1384/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..5b108efb3c4090eee0f3f064e82a13982b9d9de3 --- /dev/null +++ b/checkpoint-1384/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bbdf004a5be6e40de48c5be07c6f5f191d2dd555256356c4d666fb7fbef9b409 +size 14244 diff --git a/checkpoint-1384/scheduler.pt b/checkpoint-1384/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..df3e2815192ecd78a96f55b3aa37cee14b1332f1 --- /dev/null +++ b/checkpoint-1384/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ffe5f11da2c9345b936c863ab8075f2299a710197bc2a0d79f2151a50fad923b +size 1064 diff --git a/checkpoint-1384/special_tokens_map.json b/checkpoint-1384/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..1c69f55fd7bb10288d16d4c76497e21e25adfe4f --- /dev/null +++ b/checkpoint-1384/special_tokens_map.json @@ -0,0 +1,27 @@ +{ + "additional_special_tokens": [ + "<|eot_id|>", + "<|eom_id|>" + ], + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/checkpoint-1384/tokenizer.json b/checkpoint-1384/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..1c1d8d5c9024994f1d3b00f9662b8dd89ca13cf2 --- /dev/null +++ b/checkpoint-1384/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b9e4e7fb171f92fd137b777cc2714bf87d11576700a1dcd7a399e7bbe39537b +size 17209920 diff --git a/checkpoint-1384/tokenizer_config.json b/checkpoint-1384/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..3c3fc3b2c234aa8b39e8ee01e3ee9e6fe54a2a1f --- /dev/null +++ b/checkpoint-1384/tokenizer_config.json @@ -0,0 +1,2070 @@ +{ + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|python_tag|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "additional_special_tokens": [ + "<|eot_id|>", + "<|eom_id|>" + ], + "bos_token": "<|begin_of_text|>", + "chat_template": "{{- bos_token }}\n{%- if custom_tools is defined %}\n {%- set tools = custom_tools %}\n{%- endif %}\n{%- if not tools_in_user_message is defined %}\n {%- set tools_in_user_message = true %}\n{%- endif %}\n{%- if not tools is defined %}\n {%- set tools = none %}\n{%- endif %}\n\n{#- Extract system message #}\n{{- \"<|start_header_id|>system<|end_header_id|>\\n\\n\" }}\n{%- if messages[0]['role'] == 'system' %}\n {%- set system_message = messages[0]['content'] | trim %}\n {%- set messages = messages[1:] %}\n {{- system_message + \"\\n\" }}\n{%- else %}\n {%- set system_message = \"You are a helpful assistant that can use tools. You are developed by Salesforce xLAM team.\" %}\n {% set format_instruction %}You have access to a set of tools. When using tools, make calls in a single JSON array: \n\n[{\"name\": \"tool_call_name\", \"arguments\": {\"arg1\": \"value1\", \"arg2\": \"value2\"}}, ... (additional parallel tool calls as needed)]\n\nIf no tool is suitable, state that explicitly. If the user's input lacks required parameters, ask for clarification. Do not interpret or respond until tool results are returned. Once they are available, process them or make additional calls if needed. For tasks that don't require tools, such as casual conversation or general advice, respond directly in plain text. The available tools are:{% endset %}\n {{- system_message + \"\\n\" }}\n {%- if tools is not none %}\n {{- format_instruction + \"\\n\\n\" }}\n {%- endif %}\n{%- endif %}\n\n\n{%- if tools is not none %}\n {%- for t in tools %}\n {{- t | tojson(indent=4) }}\n {{- \"\\n\\n\" }}\n {%- endfor %}\n{%- endif %}\n{{- \"<|eot_id|>\" }}\n\n{%- for message in messages %}\n {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %}\n {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\\n\\n'+ message['content'] | trim + '<|eot_id|>' }}\n {%- elif 'tool_calls' in message %}\n {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' -}}\n {%- if message['tool_calls'] %}\n {{- \"[\" }}\n {%- for tool_call_function in message.tool_calls %}\n {%- set tool_call = tool_call_function.function %}\n {{- '{\"name\": \"' + tool_call.name + '\", ' }}\n {{- '\"arguments\": ' }}\n {{- tool_call.arguments | tojson }}\n {{- \"}\" }}\n {%- if not loop.last %}\n {{- \", \" }}\n {%- endif %}\n {%- endfor %}\n {{- \"]\" }}\n {{- \"<|eot_id|>\" }}\n {%- elif message['content'] %}\n {{- message['content'] | trim + '<|eot_id|>' }}\n {%- else %}\n {{- \"[]\\n\" + '<|eot_id|>' }}\n {%- endif %}\n {%- elif message.role == \"tool\" or message.role == \"ipython\" %}\n {{- \"<|start_header_id|>\" + \"ipython\" + \"<|end_header_id|>\\n\\n\" }}\n {%- set content = message[\"content\"] %}\n {%- if content is mapping or (content is iterable and content is not string) %}\n {{- content | tojson }}\n {%- else %}\n {{- content }}\n {%- endif %}\n {{- \"<|eot_id|>\" }}\n {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' }}\n{%- endif %}", + "clean_up_tokenization_spaces": true, + "eos_token": "<|eot_id|>", + "extra_special_tokens": {}, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 16384, + "pad_token": "<|eot_id|>", + "padding_side": "right", + "split_special_tokens": false, + "tokenizer_class": "PreTrainedTokenizerFast" +} diff --git a/checkpoint-1384/trainer_state.json b/checkpoint-1384/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..3aa10d893d9909d192df12a79956da6c569a2667 --- /dev/null +++ b/checkpoint-1384/trainer_state.json @@ -0,0 +1,1063 @@ +{ + "best_metric": 0.09926149994134903, + "best_model_checkpoint": "./xlam_lora_new_2560_1_delete_over_size_3epoch_multi/checkpoint-1384", + "epoch": 2.6610587033587696, + "eval_steps": 173, + "global_step": 1384, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.019227302769933306, + "grad_norm": 0.8050442337989807, + "learning_rate": 3.205128205128205e-06, + "loss": 0.6642, + "step": 10 + }, + { + "epoch": 0.03845460553986661, + "grad_norm": 0.45196670293807983, + "learning_rate": 6.41025641025641e-06, + "loss": 0.6377, + "step": 20 + }, + { + "epoch": 0.05768190830979992, + "grad_norm": 0.47538116574287415, + "learning_rate": 9.615384615384616e-06, + "loss": 0.6205, + "step": 30 + }, + { + "epoch": 0.07690921107973323, + "grad_norm": 0.34218236804008484, + "learning_rate": 1.282051282051282e-05, + "loss": 0.5579, + "step": 40 + }, + { + "epoch": 0.09613651384966652, + "grad_norm": 0.38529354333877563, + "learning_rate": 1.602564102564103e-05, + "loss": 0.4393, + "step": 50 + }, + { + "epoch": 0.11536381661959984, + "grad_norm": 0.3676348924636841, + "learning_rate": 1.923076923076923e-05, + "loss": 0.3965, + "step": 60 + }, + { + "epoch": 0.13459111938953314, + "grad_norm": 0.3427989184856415, + "learning_rate": 2.2435897435897437e-05, + "loss": 0.3488, + "step": 70 + }, + { + "epoch": 0.15381842215946645, + "grad_norm": 0.30555886030197144, + "learning_rate": 2.4999887657859027e-05, + "loss": 0.2224, + "step": 80 + }, + { + "epoch": 0.17304572492939974, + "grad_norm": 0.3501119315624237, + "learning_rate": 2.4995955894949523e-05, + "loss": 0.248, + "step": 90 + }, + { + "epoch": 0.19227302769933305, + "grad_norm": 0.36164987087249756, + "learning_rate": 2.4986409044149163e-05, + "loss": 0.2322, + "step": 100 + }, + { + "epoch": 0.21150033046926636, + "grad_norm": 0.3375028967857361, + "learning_rate": 2.4971251395358342e-05, + "loss": 0.2427, + "step": 110 + }, + { + "epoch": 0.23072763323919968, + "grad_norm": 0.3321882486343384, + "learning_rate": 2.495048975970308e-05, + "loss": 0.1967, + "step": 120 + }, + { + "epoch": 0.24995493600913296, + "grad_norm": 0.2828320562839508, + "learning_rate": 2.492413346647437e-05, + "loss": 0.1636, + "step": 130 + }, + { + "epoch": 0.2691822387790663, + "grad_norm": 0.3430372476577759, + "learning_rate": 2.4892194358936095e-05, + "loss": 0.2041, + "step": 140 + }, + { + "epoch": 0.28840954154899956, + "grad_norm": 0.3393559455871582, + "learning_rate": 2.4854686789003173e-05, + "loss": 0.1911, + "step": 150 + }, + { + "epoch": 0.3076368443189329, + "grad_norm": 0.36908936500549316, + "learning_rate": 2.4811627610792543e-05, + "loss": 0.2025, + "step": 160 + }, + { + "epoch": 0.3268641470888662, + "grad_norm": 0.38679710030555725, + "learning_rate": 2.4763036173049677e-05, + "loss": 0.1566, + "step": 170 + }, + { + "epoch": 0.3326323379198462, + "eval_loss": 0.14381718635559082, + "eval_runtime": 202.8561, + "eval_samples_per_second": 5.285, + "eval_steps_per_second": 5.285, + "step": 173 + }, + { + "epoch": 0.34609144985879947, + "grad_norm": 0.39648687839508057, + "learning_rate": 2.4708934310454207e-05, + "loss": 0.1943, + "step": 180 + }, + { + "epoch": 0.3653187526287328, + "grad_norm": 0.3979399800300598, + "learning_rate": 2.4649346333808458e-05, + "loss": 0.1594, + "step": 190 + }, + { + "epoch": 0.3845460553986661, + "grad_norm": 0.34854283928871155, + "learning_rate": 2.458429901911331e-05, + "loss": 0.1683, + "step": 200 + }, + { + "epoch": 0.40377335816859944, + "grad_norm": 0.26675811409950256, + "learning_rate": 2.4513821595536356e-05, + "loss": 0.1616, + "step": 210 + }, + { + "epoch": 0.4230006609385327, + "grad_norm": 0.4399104118347168, + "learning_rate": 2.44379457322777e-05, + "loss": 0.1664, + "step": 220 + }, + { + "epoch": 0.442227963708466, + "grad_norm": 0.5316939353942871, + "learning_rate": 2.4356705524339317e-05, + "loss": 0.1745, + "step": 230 + }, + { + "epoch": 0.46145526647839935, + "grad_norm": 0.5996547341346741, + "learning_rate": 2.4270137477204408e-05, + "loss": 0.1753, + "step": 240 + }, + { + "epoch": 0.48068256924833264, + "grad_norm": 0.4330001175403595, + "learning_rate": 2.417828049043353e-05, + "loss": 0.1997, + "step": 250 + }, + { + "epoch": 0.4999098720182659, + "grad_norm": 0.4255751073360443, + "learning_rate": 2.4081175840185022e-05, + "loss": 0.1728, + "step": 260 + }, + { + "epoch": 0.5191371747881992, + "grad_norm": 0.536382257938385, + "learning_rate": 2.3978867160667457e-05, + "loss": 0.147, + "step": 270 + }, + { + "epoch": 0.5383644775581325, + "grad_norm": 0.5623698830604553, + "learning_rate": 2.3871400424532493e-05, + "loss": 0.1863, + "step": 280 + }, + { + "epoch": 0.5575917803280659, + "grad_norm": 0.49679550528526306, + "learning_rate": 2.375882392221695e-05, + "loss": 0.1685, + "step": 290 + }, + { + "epoch": 0.5768190830979991, + "grad_norm": 0.5784851908683777, + "learning_rate": 2.36411882402434e-05, + "loss": 0.1506, + "step": 300 + }, + { + "epoch": 0.5960463858679325, + "grad_norm": 0.6098183393478394, + "learning_rate": 2.3518546238489e-05, + "loss": 0.1565, + "step": 310 + }, + { + "epoch": 0.6152736886378658, + "grad_norm": 0.5198598504066467, + "learning_rate": 2.339095302643273e-05, + "loss": 0.1433, + "step": 320 + }, + { + "epoch": 0.634500991407799, + "grad_norm": 0.5796005129814148, + "learning_rate": 2.325846593839188e-05, + "loss": 0.1668, + "step": 330 + }, + { + "epoch": 0.6537282941777324, + "grad_norm": 0.6006646752357483, + "learning_rate": 2.312114450775869e-05, + "loss": 0.1505, + "step": 340 + }, + { + "epoch": 0.6652646758396924, + "eval_loss": 0.1198095753788948, + "eval_runtime": 202.8938, + "eval_samples_per_second": 5.284, + "eval_steps_per_second": 5.284, + "step": 346 + }, + { + "epoch": 0.6729555969476657, + "grad_norm": 0.5787773728370667, + "learning_rate": 2.2979050440248896e-05, + "loss": 0.1442, + "step": 350 + }, + { + "epoch": 0.6921828997175989, + "grad_norm": 0.5230283141136169, + "learning_rate": 2.2832247586174118e-05, + "loss": 0.1555, + "step": 360 + }, + { + "epoch": 0.7114102024875323, + "grad_norm": 0.5551069378852844, + "learning_rate": 2.2680801911750558e-05, + "loss": 0.1422, + "step": 370 + }, + { + "epoch": 0.7306375052574656, + "grad_norm": 0.5769614577293396, + "learning_rate": 2.2524781469456928e-05, + "loss": 0.165, + "step": 380 + }, + { + "epoch": 0.7498648080273989, + "grad_norm": 0.6609200239181519, + "learning_rate": 2.2364256367454922e-05, + "loss": 0.161, + "step": 390 + }, + { + "epoch": 0.7690921107973322, + "grad_norm": 0.5530131459236145, + "learning_rate": 2.2199298738085907e-05, + "loss": 0.1709, + "step": 400 + }, + { + "epoch": 0.7883194135672655, + "grad_norm": 0.7019795775413513, + "learning_rate": 2.2029982705458107e-05, + "loss": 0.1471, + "step": 410 + }, + { + "epoch": 0.8075467163371989, + "grad_norm": 0.5327528715133667, + "learning_rate": 2.1856384352138765e-05, + "loss": 0.1913, + "step": 420 + }, + { + "epoch": 0.8267740191071321, + "grad_norm": 0.5548112988471985, + "learning_rate": 2.1678581684966235e-05, + "loss": 0.1509, + "step": 430 + }, + { + "epoch": 0.8460013218770654, + "grad_norm": 0.51619553565979, + "learning_rate": 2.149665459999743e-05, + "loss": 0.1341, + "step": 440 + }, + { + "epoch": 0.8652286246469988, + "grad_norm": 0.6642457842826843, + "learning_rate": 2.1310684846606346e-05, + "loss": 0.1458, + "step": 450 + }, + { + "epoch": 0.884455927416932, + "grad_norm": 0.48370271921157837, + "learning_rate": 2.1120755990749762e-05, + "loss": 0.1584, + "step": 460 + }, + { + "epoch": 0.9036832301868654, + "grad_norm": 0.8130201697349548, + "learning_rate": 2.092695337741671e-05, + "loss": 0.1389, + "step": 470 + }, + { + "epoch": 0.9229105329567987, + "grad_norm": 0.4986889958381653, + "learning_rate": 2.0729364092278456e-05, + "loss": 0.1263, + "step": 480 + }, + { + "epoch": 0.9421378357267319, + "grad_norm": 0.6791219711303711, + "learning_rate": 2.052807692255638e-05, + "loss": 0.1562, + "step": 490 + }, + { + "epoch": 0.9613651384966653, + "grad_norm": 0.6069239974021912, + "learning_rate": 2.0323182317125198e-05, + "loss": 0.1296, + "step": 500 + }, + { + "epoch": 0.9805924412665986, + "grad_norm": 0.6993957161903381, + "learning_rate": 2.011477234586957e-05, + "loss": 0.1695, + "step": 510 + }, + { + "epoch": 0.9978970137595385, + "eval_loss": 0.11108512431383133, + "eval_runtime": 202.9151, + "eval_samples_per_second": 5.283, + "eval_steps_per_second": 5.283, + "step": 519 + }, + { + "epoch": 0.9998197440365318, + "grad_norm": 0.5495030283927917, + "learning_rate": 1.9902940658312253e-05, + "loss": 0.1512, + "step": 520 + }, + { + "epoch": 1.0190470468064652, + "grad_norm": 0.5100754499435425, + "learning_rate": 1.968778244153246e-05, + "loss": 0.1088, + "step": 530 + }, + { + "epoch": 1.0382743495763984, + "grad_norm": 0.6836853623390198, + "learning_rate": 1.9469394377393335e-05, + "loss": 0.1524, + "step": 540 + }, + { + "epoch": 1.0575016523463319, + "grad_norm": 0.5304776430130005, + "learning_rate": 1.9247874599097714e-05, + "loss": 0.1239, + "step": 550 + }, + { + "epoch": 1.076728955116265, + "grad_norm": 0.6995298862457275, + "learning_rate": 1.9023322647091736e-05, + "loss": 0.1203, + "step": 560 + }, + { + "epoch": 1.0959562578861983, + "grad_norm": 0.579207181930542, + "learning_rate": 1.8795839424336097e-05, + "loss": 0.134, + "step": 570 + }, + { + "epoch": 1.1151835606561318, + "grad_norm": 0.4746134877204895, + "learning_rate": 1.8565527150965077e-05, + "loss": 0.1344, + "step": 580 + }, + { + "epoch": 1.134410863426065, + "grad_norm": 0.8127744793891907, + "learning_rate": 1.8332489318353655e-05, + "loss": 0.1157, + "step": 590 + }, + { + "epoch": 1.1536381661959982, + "grad_norm": 0.6949151158332825, + "learning_rate": 1.809683064261343e-05, + "loss": 0.1197, + "step": 600 + }, + { + "epoch": 1.1728654689659317, + "grad_norm": 0.6869731545448303, + "learning_rate": 1.7858657017538178e-05, + "loss": 0.1392, + "step": 610 + }, + { + "epoch": 1.192092771735865, + "grad_norm": 0.7461158037185669, + "learning_rate": 1.7618075467020213e-05, + "loss": 0.1262, + "step": 620 + }, + { + "epoch": 1.2113200745057981, + "grad_norm": 0.5442166924476624, + "learning_rate": 1.7375194096958946e-05, + "loss": 0.1258, + "step": 630 + }, + { + "epoch": 1.2305473772757316, + "grad_norm": 0.7670741081237793, + "learning_rate": 1.713012204668325e-05, + "loss": 0.1204, + "step": 640 + }, + { + "epoch": 1.2497746800456648, + "grad_norm": 0.3919640779495239, + "learning_rate": 1.6882969439909434e-05, + "loss": 0.1444, + "step": 650 + }, + { + "epoch": 1.269001982815598, + "grad_norm": 0.6234434247016907, + "learning_rate": 1.663384733525686e-05, + "loss": 0.1245, + "step": 660 + }, + { + "epoch": 1.2882292855855315, + "grad_norm": 0.7237009406089783, + "learning_rate": 1.638286767634353e-05, + "loss": 0.1258, + "step": 670 + }, + { + "epoch": 1.3074565883554647, + "grad_norm": 0.6398624181747437, + "learning_rate": 1.613014324148392e-05, + "loss": 0.1519, + "step": 680 + }, + { + "epoch": 1.326683891125398, + "grad_norm": 0.7676591873168945, + "learning_rate": 1.5875787593011784e-05, + "loss": 0.1545, + "step": 690 + }, + { + "epoch": 1.3305293516793848, + "eval_loss": 0.10604555904865265, + "eval_runtime": 203.0173, + "eval_samples_per_second": 5.28, + "eval_steps_per_second": 5.28, + "step": 692 + }, + { + "epoch": 1.3459111938953314, + "grad_norm": 0.5583875775337219, + "learning_rate": 1.5619915026250646e-05, + "loss": 0.1141, + "step": 700 + }, + { + "epoch": 1.3651384966652647, + "grad_norm": 0.5790243148803711, + "learning_rate": 1.536264051815491e-05, + "loss": 0.1326, + "step": 710 + }, + { + "epoch": 1.3843657994351979, + "grad_norm": 0.7467628121376038, + "learning_rate": 1.5104079675644706e-05, + "loss": 0.1439, + "step": 720 + }, + { + "epoch": 1.4035931022051313, + "grad_norm": 0.9867657423019409, + "learning_rate": 1.4844348683657616e-05, + "loss": 0.1385, + "step": 730 + }, + { + "epoch": 1.4228204049750646, + "grad_norm": 0.7909297347068787, + "learning_rate": 1.4583564252940735e-05, + "loss": 0.1259, + "step": 740 + }, + { + "epoch": 1.4420477077449978, + "grad_norm": 0.6159791350364685, + "learning_rate": 1.432184356760637e-05, + "loss": 0.1126, + "step": 750 + }, + { + "epoch": 1.4612750105149312, + "grad_norm": 0.6234619617462158, + "learning_rate": 1.4059304232475098e-05, + "loss": 0.1144, + "step": 760 + }, + { + "epoch": 1.4805023132848645, + "grad_norm": 0.7142959833145142, + "learning_rate": 1.3796064220229765e-05, + "loss": 0.1249, + "step": 770 + }, + { + "epoch": 1.4997296160547977, + "grad_norm": 0.6258341073989868, + "learning_rate": 1.3532241818404156e-05, + "loss": 0.1321, + "step": 780 + }, + { + "epoch": 1.5189569188247312, + "grad_norm": 0.5723307728767395, + "learning_rate": 1.326795557623022e-05, + "loss": 0.1193, + "step": 790 + }, + { + "epoch": 1.5381842215946644, + "grad_norm": 0.7454131841659546, + "learning_rate": 1.300332425136769e-05, + "loss": 0.1281, + "step": 800 + }, + { + "epoch": 1.5574115243645976, + "grad_norm": 0.5975070595741272, + "learning_rate": 1.273846675654003e-05, + "loss": 0.1321, + "step": 810 + }, + { + "epoch": 1.576638827134531, + "grad_norm": 0.7056507468223572, + "learning_rate": 1.2473502106100723e-05, + "loss": 0.1444, + "step": 820 + }, + { + "epoch": 1.5958661299044643, + "grad_norm": 0.7889280915260315, + "learning_rate": 1.2208549362553885e-05, + "loss": 0.1226, + "step": 830 + }, + { + "epoch": 1.6150934326743975, + "grad_norm": 0.7041313648223877, + "learning_rate": 1.194372758305325e-05, + "loss": 0.1316, + "step": 840 + }, + { + "epoch": 1.634320735444331, + "grad_norm": 0.7797935605049133, + "learning_rate": 1.1679155765903524e-05, + "loss": 0.132, + "step": 850 + }, + { + "epoch": 1.6535480382142642, + "grad_norm": 0.6426231861114502, + "learning_rate": 1.1414952797088248e-05, + "loss": 0.1101, + "step": 860 + }, + { + "epoch": 1.663161689599231, + "eval_loss": 0.10293085128068924, + "eval_runtime": 203.1567, + "eval_samples_per_second": 5.277, + "eval_steps_per_second": 5.277, + "step": 865 + }, + { + "epoch": 1.6727753409841974, + "grad_norm": 1.0461760759353638, + "learning_rate": 1.1151237396848058e-05, + "loss": 0.128, + "step": 870 + }, + { + "epoch": 1.692002643754131, + "grad_norm": 0.8692240118980408, + "learning_rate": 1.088812806633349e-05, + "loss": 0.1114, + "step": 880 + }, + { + "epoch": 1.7112299465240641, + "grad_norm": 0.5583866238594055, + "learning_rate": 1.0625743034356183e-05, + "loss": 0.1309, + "step": 890 + }, + { + "epoch": 1.7304572492939974, + "grad_norm": 0.5476118922233582, + "learning_rate": 1.0364200204262473e-05, + "loss": 0.1156, + "step": 900 + }, + { + "epoch": 1.7496845520639308, + "grad_norm": 0.8960713148117065, + "learning_rate": 1.0103617100953274e-05, + "loss": 0.1305, + "step": 910 + }, + { + "epoch": 1.768911854833864, + "grad_norm": 0.6927953958511353, + "learning_rate": 9.84411081807393e-06, + "loss": 0.1245, + "step": 920 + }, + { + "epoch": 1.7881391576037973, + "grad_norm": 0.5891989469528198, + "learning_rate": 9.585797965397949e-06, + "loss": 0.1125, + "step": 930 + }, + { + "epoch": 1.8073664603737307, + "grad_norm": 0.8319947123527527, + "learning_rate": 9.328794616428092e-06, + "loss": 0.1462, + "step": 940 + }, + { + "epoch": 1.826593763143664, + "grad_norm": 0.7439499497413635, + "learning_rate": 9.073216256238485e-06, + "loss": 0.1167, + "step": 950 + }, + { + "epoch": 1.8458210659135972, + "grad_norm": 0.7593638896942139, + "learning_rate": 8.8191777295811e-06, + "loss": 0.1356, + "step": 960 + }, + { + "epoch": 1.8650483686835306, + "grad_norm": 0.871376097202301, + "learning_rate": 8.56679318928e-06, + "loss": 0.1173, + "step": 970 + }, + { + "epoch": 1.884275671453464, + "grad_norm": 0.8772872090339661, + "learning_rate": 8.31617604493651e-06, + "loss": 0.1347, + "step": 980 + }, + { + "epoch": 1.903502974223397, + "grad_norm": 0.6309168934822083, + "learning_rate": 8.067438911968305e-06, + "loss": 0.1382, + "step": 990 + }, + { + "epoch": 1.9227302769933305, + "grad_norm": 0.775113046169281, + "learning_rate": 7.820693561005429e-06, + "loss": 0.1368, + "step": 1000 + }, + { + "epoch": 1.941957579763264, + "grad_norm": 0.9096739888191223, + "learning_rate": 7.576050867665876e-06, + "loss": 0.1263, + "step": 1010 + }, + { + "epoch": 1.961184882533197, + "grad_norm": 0.7637848258018494, + "learning_rate": 7.333620762733376e-06, + "loss": 0.1148, + "step": 1020 + }, + { + "epoch": 1.9804121853031305, + "grad_norm": 0.8084997534751892, + "learning_rate": 7.0935121827597245e-06, + "loss": 0.1457, + "step": 1030 + }, + { + "epoch": 1.995794027519077, + "eval_loss": 0.10069960355758667, + "eval_runtime": 203.0573, + "eval_samples_per_second": 5.279, + "eval_steps_per_second": 5.279, + "step": 1038 + }, + { + "epoch": 1.999639488073064, + "grad_norm": 1.0884274244308472, + "learning_rate": 6.855833021113886e-06, + "loss": 0.1641, + "step": 1040 + }, + { + "epoch": 2.018866790842997, + "grad_norm": 0.702237069606781, + "learning_rate": 6.620690079499835e-06, + "loss": 0.1159, + "step": 1050 + }, + { + "epoch": 2.0380940936129304, + "grad_norm": 0.6377178430557251, + "learning_rate": 6.388189019964976e-06, + "loss": 0.1103, + "step": 1060 + }, + { + "epoch": 2.057321396382864, + "grad_norm": 0.8843504786491394, + "learning_rate": 6.158434317420636e-06, + "loss": 0.1178, + "step": 1070 + }, + { + "epoch": 2.076548699152797, + "grad_norm": 0.42746174335479736, + "learning_rate": 5.931529212695996e-06, + "loss": 0.1143, + "step": 1080 + }, + { + "epoch": 2.0957760019227303, + "grad_norm": 0.7449749708175659, + "learning_rate": 5.70757566614661e-06, + "loss": 0.1262, + "step": 1090 + }, + { + "epoch": 2.1150033046926637, + "grad_norm": 0.6538805961608887, + "learning_rate": 5.48667431183824e-06, + "loss": 0.1344, + "step": 1100 + }, + { + "epoch": 2.1342306074625967, + "grad_norm": 0.8034993410110474, + "learning_rate": 5.268924412326709e-06, + "loss": 0.1447, + "step": 1110 + }, + { + "epoch": 2.15345791023253, + "grad_norm": 0.7438477277755737, + "learning_rate": 5.054423814054049e-06, + "loss": 0.1082, + "step": 1120 + }, + { + "epoch": 2.1726852130024636, + "grad_norm": 0.5646623373031616, + "learning_rate": 4.843268903380932e-06, + "loss": 0.1199, + "step": 1130 + }, + { + "epoch": 2.1919125157723967, + "grad_norm": 0.9965047240257263, + "learning_rate": 4.6355545632752575e-06, + "loss": 0.1303, + "step": 1140 + }, + { + "epoch": 2.21113981854233, + "grad_norm": 0.8709131479263306, + "learning_rate": 4.4313741306762495e-06, + "loss": 0.1107, + "step": 1150 + }, + { + "epoch": 2.2303671213122636, + "grad_norm": 0.6653530597686768, + "learning_rate": 4.230819354553279e-06, + "loss": 0.1053, + "step": 1160 + }, + { + "epoch": 2.2495944240821966, + "grad_norm": 0.766173243522644, + "learning_rate": 4.033980354678239e-06, + "loss": 0.1017, + "step": 1170 + }, + { + "epoch": 2.26882172685213, + "grad_norm": 0.5112572312355042, + "learning_rate": 3.840945581130008e-06, + "loss": 0.109, + "step": 1180 + }, + { + "epoch": 2.2880490296220635, + "grad_norm": 0.8744060397148132, + "learning_rate": 3.651801774549213e-06, + "loss": 0.1026, + "step": 1190 + }, + { + "epoch": 2.3072763323919965, + "grad_norm": 0.8215727806091309, + "learning_rate": 3.4666339271610836e-06, + "loss": 0.1058, + "step": 1200 + }, + { + "epoch": 2.32650363516193, + "grad_norm": 0.6597920656204224, + "learning_rate": 3.285525244584017e-06, + "loss": 0.1378, + "step": 1210 + }, + { + "epoch": 2.3284263654389235, + "eval_loss": 0.10013294219970703, + "eval_runtime": 203.5302, + "eval_samples_per_second": 5.267, + "eval_steps_per_second": 5.267, + "step": 1211 + }, + { + "epoch": 2.3457309379318634, + "grad_norm": 0.7206103205680847, + "learning_rate": 3.108557108440914e-06, + "loss": 0.1028, + "step": 1220 + }, + { + "epoch": 2.3649582407017964, + "grad_norm": 0.968497097492218, + "learning_rate": 2.9358090397901634e-06, + "loss": 0.1345, + "step": 1230 + }, + { + "epoch": 2.38418554347173, + "grad_norm": 0.7522798180580139, + "learning_rate": 2.767358663392658e-06, + "loss": 0.1029, + "step": 1240 + }, + { + "epoch": 2.4034128462416633, + "grad_norm": 0.8699542284011841, + "learning_rate": 2.6032816728309166e-06, + "loss": 0.1181, + "step": 1250 + }, + { + "epoch": 2.4226401490115963, + "grad_norm": 0.8779841661453247, + "learning_rate": 2.4436517964960005e-06, + "loss": 0.1028, + "step": 1260 + }, + { + "epoch": 2.4418674517815298, + "grad_norm": 0.6922764182090759, + "learning_rate": 2.2885407644574696e-06, + "loss": 0.1148, + "step": 1270 + }, + { + "epoch": 2.461094754551463, + "grad_norm": 0.7528237700462341, + "learning_rate": 2.1380182762313238e-06, + "loss": 0.1128, + "step": 1280 + }, + { + "epoch": 2.480322057321396, + "grad_norm": 0.8349286913871765, + "learning_rate": 1.992151969460333e-06, + "loss": 0.1027, + "step": 1290 + }, + { + "epoch": 2.4995493600913297, + "grad_norm": 0.8040717244148254, + "learning_rate": 1.8510073895209131e-06, + "loss": 0.1001, + "step": 1300 + }, + { + "epoch": 2.518776662861263, + "grad_norm": 0.8065551519393921, + "learning_rate": 1.7146479600701565e-06, + "loss": 0.1454, + "step": 1310 + }, + { + "epoch": 2.538003965631196, + "grad_norm": 0.7855721712112427, + "learning_rate": 1.5831349545462461e-06, + "loss": 0.1063, + "step": 1320 + }, + { + "epoch": 2.5572312684011296, + "grad_norm": 0.9087608456611633, + "learning_rate": 1.4565274686351022e-06, + "loss": 0.1155, + "step": 1330 + }, + { + "epoch": 2.576458571171063, + "grad_norm": 0.49701324105262756, + "learning_rate": 1.334882393715585e-06, + "loss": 0.1001, + "step": 1340 + }, + { + "epoch": 2.5956858739409965, + "grad_norm": 0.7943114638328552, + "learning_rate": 1.2182543912952178e-06, + "loss": 0.1107, + "step": 1350 + }, + { + "epoch": 2.6149131767109295, + "grad_norm": 0.8685261607170105, + "learning_rate": 1.1066958684479074e-06, + "loss": 0.1209, + "step": 1360 + }, + { + "epoch": 2.634140479480863, + "grad_norm": 1.0667730569839478, + "learning_rate": 1.0002569542646973e-06, + "loss": 0.1361, + "step": 1370 + }, + { + "epoch": 2.653367782250796, + "grad_norm": 0.6879278421401978, + "learning_rate": 8.989854773281486e-07, + "loss": 0.0925, + "step": 1380 + }, + { + "epoch": 2.6610587033587696, + "eval_loss": 0.09926149994134903, + "eval_runtime": 203.0153, + "eval_samples_per_second": 5.28, + "eval_steps_per_second": 5.28, + "step": 1384 + } + ], + "logging_steps": 10, + "max_steps": 1560, + "num_input_tokens_seen": 0, + "num_train_epochs": 3, + "save_steps": 173, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 1.209436026569687e+18, + "train_batch_size": 1, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-1384/training_args.bin b/checkpoint-1384/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..80c5d5c3761b0cd827095650fdc744b87401c78b --- /dev/null +++ b/checkpoint-1384/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:70ce65338e377750e8ed719c3c255445a47816429c580e0d811222e2d91c722b +size 5624 diff --git a/checkpoint-1557/README.md b/checkpoint-1557/README.md new file mode 100644 index 0000000000000000000000000000000000000000..20c165c7feaa880a18e0662941a5b246a698be2e --- /dev/null +++ b/checkpoint-1557/README.md @@ -0,0 +1,209 @@ +--- +base_model: Salesforce/Llama-xLAM-2-8b-fc-r +library_name: peft +pipeline_tag: text-generation +tags: +- base_model:adapter:Salesforce/Llama-xLAM-2-8b-fc-r +- lora +- sft +- transformers +- trl +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.17.1 \ No newline at end of file diff --git a/checkpoint-1557/adapter_config.json b/checkpoint-1557/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..53e6fe7efe54ad6e008096a354e97649fac2cc73 --- /dev/null +++ b/checkpoint-1557/adapter_config.json @@ -0,0 +1,39 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "Salesforce/Llama-xLAM-2-8b-fc-r", + "bias": "none", + "corda_config": null, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 32, + "lora_bias": false, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "qalora_group_size": 16, + "r": 16, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "k_proj", + "q_proj", + "o_proj", + "v_proj" + ], + "target_parameters": null, + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/checkpoint-1557/adapter_model.safetensors b/checkpoint-1557/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..78727f6b520aa67aafce8f3dec223af873281e44 --- /dev/null +++ b/checkpoint-1557/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:81ca1e0f2440a8f2c57b47d266efd302c27cbb5d8554f035eb73bdd8c42045c0 +size 54560368 diff --git a/checkpoint-1557/optimizer.pt b/checkpoint-1557/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..a260fb0d0810d10148aa18077e10767788e76f86 --- /dev/null +++ b/checkpoint-1557/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e1c8547875a6b5ca9fb7476acf86051d9b180bee6d131b58a16868d629833deb +size 109267450 diff --git a/checkpoint-1557/rng_state.pth b/checkpoint-1557/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..89bb526d6e11d490795944b7fdbbabc32b96bf2d --- /dev/null +++ b/checkpoint-1557/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7f05b77add0ee440084527f60e51a6dc456fe89bd1f7c856c97bd0d722be225c +size 14244 diff --git a/checkpoint-1557/scheduler.pt b/checkpoint-1557/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..bdb3315e91b5bd426fe0f1a63a4dddd4f7b0e082 --- /dev/null +++ b/checkpoint-1557/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9acbe683318945beee4e754886d42f253a03c21c8be62cde8d52e491063ec170 +size 1064 diff --git a/checkpoint-1557/special_tokens_map.json b/checkpoint-1557/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..1c69f55fd7bb10288d16d4c76497e21e25adfe4f --- /dev/null +++ b/checkpoint-1557/special_tokens_map.json @@ -0,0 +1,27 @@ +{ + "additional_special_tokens": [ + "<|eot_id|>", + "<|eom_id|>" + ], + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/checkpoint-1557/tokenizer.json b/checkpoint-1557/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..1c1d8d5c9024994f1d3b00f9662b8dd89ca13cf2 --- /dev/null +++ b/checkpoint-1557/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b9e4e7fb171f92fd137b777cc2714bf87d11576700a1dcd7a399e7bbe39537b +size 17209920 diff --git a/checkpoint-1557/tokenizer_config.json b/checkpoint-1557/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..3c3fc3b2c234aa8b39e8ee01e3ee9e6fe54a2a1f --- /dev/null +++ b/checkpoint-1557/tokenizer_config.json @@ -0,0 +1,2070 @@ +{ + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|python_tag|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "additional_special_tokens": [ + "<|eot_id|>", + "<|eom_id|>" + ], + "bos_token": "<|begin_of_text|>", + "chat_template": "{{- bos_token }}\n{%- if custom_tools is defined %}\n {%- set tools = custom_tools %}\n{%- endif %}\n{%- if not tools_in_user_message is defined %}\n {%- set tools_in_user_message = true %}\n{%- endif %}\n{%- if not tools is defined %}\n {%- set tools = none %}\n{%- endif %}\n\n{#- Extract system message #}\n{{- \"<|start_header_id|>system<|end_header_id|>\\n\\n\" }}\n{%- if messages[0]['role'] == 'system' %}\n {%- set system_message = messages[0]['content'] | trim %}\n {%- set messages = messages[1:] %}\n {{- system_message + \"\\n\" }}\n{%- else %}\n {%- set system_message = \"You are a helpful assistant that can use tools. You are developed by Salesforce xLAM team.\" %}\n {% set format_instruction %}You have access to a set of tools. When using tools, make calls in a single JSON array: \n\n[{\"name\": \"tool_call_name\", \"arguments\": {\"arg1\": \"value1\", \"arg2\": \"value2\"}}, ... (additional parallel tool calls as needed)]\n\nIf no tool is suitable, state that explicitly. If the user's input lacks required parameters, ask for clarification. Do not interpret or respond until tool results are returned. Once they are available, process them or make additional calls if needed. For tasks that don't require tools, such as casual conversation or general advice, respond directly in plain text. The available tools are:{% endset %}\n {{- system_message + \"\\n\" }}\n {%- if tools is not none %}\n {{- format_instruction + \"\\n\\n\" }}\n {%- endif %}\n{%- endif %}\n\n\n{%- if tools is not none %}\n {%- for t in tools %}\n {{- t | tojson(indent=4) }}\n {{- \"\\n\\n\" }}\n {%- endfor %}\n{%- endif %}\n{{- \"<|eot_id|>\" }}\n\n{%- for message in messages %}\n {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %}\n {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\\n\\n'+ message['content'] | trim + '<|eot_id|>' }}\n {%- elif 'tool_calls' in message %}\n {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' -}}\n {%- if message['tool_calls'] %}\n {{- \"[\" }}\n {%- for tool_call_function in message.tool_calls %}\n {%- set tool_call = tool_call_function.function %}\n {{- '{\"name\": \"' + tool_call.name + '\", ' }}\n {{- '\"arguments\": ' }}\n {{- tool_call.arguments | tojson }}\n {{- \"}\" }}\n {%- if not loop.last %}\n {{- \", \" }}\n {%- endif %}\n {%- endfor %}\n {{- \"]\" }}\n {{- \"<|eot_id|>\" }}\n {%- elif message['content'] %}\n {{- message['content'] | trim + '<|eot_id|>' }}\n {%- else %}\n {{- \"[]\\n\" + '<|eot_id|>' }}\n {%- endif %}\n {%- elif message.role == \"tool\" or message.role == \"ipython\" %}\n {{- \"<|start_header_id|>\" + \"ipython\" + \"<|end_header_id|>\\n\\n\" }}\n {%- set content = message[\"content\"] %}\n {%- if content is mapping or (content is iterable and content is not string) %}\n {{- content | tojson }}\n {%- else %}\n {{- content }}\n {%- endif %}\n {{- \"<|eot_id|>\" }}\n {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' }}\n{%- endif %}", + "clean_up_tokenization_spaces": true, + "eos_token": "<|eot_id|>", + "extra_special_tokens": {}, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 16384, + "pad_token": "<|eot_id|>", + "padding_side": "right", + "split_special_tokens": false, + "tokenizer_class": "PreTrainedTokenizerFast" +} diff --git a/checkpoint-1557/trainer_state.json b/checkpoint-1557/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..bb24ee0356f88b12bfd67f77f96f7d8a5b527f65 --- /dev/null +++ b/checkpoint-1557/trainer_state.json @@ -0,0 +1,1190 @@ +{ + "best_metric": 0.09926149994134903, + "best_model_checkpoint": "./xlam_lora_new_2560_1_delete_over_size_3epoch_multi/checkpoint-1384", + "epoch": 2.9936910412786157, + "eval_steps": 173, + "global_step": 1557, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.019227302769933306, + "grad_norm": 0.8050442337989807, + "learning_rate": 3.205128205128205e-06, + "loss": 0.6642, + "step": 10 + }, + { + "epoch": 0.03845460553986661, + "grad_norm": 0.45196670293807983, + "learning_rate": 6.41025641025641e-06, + "loss": 0.6377, + "step": 20 + }, + { + "epoch": 0.05768190830979992, + "grad_norm": 0.47538116574287415, + "learning_rate": 9.615384615384616e-06, + "loss": 0.6205, + "step": 30 + }, + { + "epoch": 0.07690921107973323, + "grad_norm": 0.34218236804008484, + "learning_rate": 1.282051282051282e-05, + "loss": 0.5579, + "step": 40 + }, + { + "epoch": 0.09613651384966652, + "grad_norm": 0.38529354333877563, + "learning_rate": 1.602564102564103e-05, + "loss": 0.4393, + "step": 50 + }, + { + "epoch": 0.11536381661959984, + "grad_norm": 0.3676348924636841, + "learning_rate": 1.923076923076923e-05, + "loss": 0.3965, + "step": 60 + }, + { + "epoch": 0.13459111938953314, + "grad_norm": 0.3427989184856415, + "learning_rate": 2.2435897435897437e-05, + "loss": 0.3488, + "step": 70 + }, + { + "epoch": 0.15381842215946645, + "grad_norm": 0.30555886030197144, + "learning_rate": 2.4999887657859027e-05, + "loss": 0.2224, + "step": 80 + }, + { + "epoch": 0.17304572492939974, + "grad_norm": 0.3501119315624237, + "learning_rate": 2.4995955894949523e-05, + "loss": 0.248, + "step": 90 + }, + { + "epoch": 0.19227302769933305, + "grad_norm": 0.36164987087249756, + "learning_rate": 2.4986409044149163e-05, + "loss": 0.2322, + "step": 100 + }, + { + "epoch": 0.21150033046926636, + "grad_norm": 0.3375028967857361, + "learning_rate": 2.4971251395358342e-05, + "loss": 0.2427, + "step": 110 + }, + { + "epoch": 0.23072763323919968, + "grad_norm": 0.3321882486343384, + "learning_rate": 2.495048975970308e-05, + "loss": 0.1967, + "step": 120 + }, + { + "epoch": 0.24995493600913296, + "grad_norm": 0.2828320562839508, + "learning_rate": 2.492413346647437e-05, + "loss": 0.1636, + "step": 130 + }, + { + "epoch": 0.2691822387790663, + "grad_norm": 0.3430372476577759, + "learning_rate": 2.4892194358936095e-05, + "loss": 0.2041, + "step": 140 + }, + { + "epoch": 0.28840954154899956, + "grad_norm": 0.3393559455871582, + "learning_rate": 2.4854686789003173e-05, + "loss": 0.1911, + "step": 150 + }, + { + "epoch": 0.3076368443189329, + "grad_norm": 0.36908936500549316, + "learning_rate": 2.4811627610792543e-05, + "loss": 0.2025, + "step": 160 + }, + { + "epoch": 0.3268641470888662, + "grad_norm": 0.38679710030555725, + "learning_rate": 2.4763036173049677e-05, + "loss": 0.1566, + "step": 170 + }, + { + "epoch": 0.3326323379198462, + "eval_loss": 0.14381718635559082, + "eval_runtime": 202.8561, + "eval_samples_per_second": 5.285, + "eval_steps_per_second": 5.285, + "step": 173 + }, + { + "epoch": 0.34609144985879947, + "grad_norm": 0.39648687839508057, + "learning_rate": 2.4708934310454207e-05, + "loss": 0.1943, + "step": 180 + }, + { + "epoch": 0.3653187526287328, + "grad_norm": 0.3979399800300598, + "learning_rate": 2.4649346333808458e-05, + "loss": 0.1594, + "step": 190 + }, + { + "epoch": 0.3845460553986661, + "grad_norm": 0.34854283928871155, + "learning_rate": 2.458429901911331e-05, + "loss": 0.1683, + "step": 200 + }, + { + "epoch": 0.40377335816859944, + "grad_norm": 0.26675811409950256, + "learning_rate": 2.4513821595536356e-05, + "loss": 0.1616, + "step": 210 + }, + { + "epoch": 0.4230006609385327, + "grad_norm": 0.4399104118347168, + "learning_rate": 2.44379457322777e-05, + "loss": 0.1664, + "step": 220 + }, + { + "epoch": 0.442227963708466, + "grad_norm": 0.5316939353942871, + "learning_rate": 2.4356705524339317e-05, + "loss": 0.1745, + "step": 230 + }, + { + "epoch": 0.46145526647839935, + "grad_norm": 0.5996547341346741, + "learning_rate": 2.4270137477204408e-05, + "loss": 0.1753, + "step": 240 + }, + { + "epoch": 0.48068256924833264, + "grad_norm": 0.4330001175403595, + "learning_rate": 2.417828049043353e-05, + "loss": 0.1997, + "step": 250 + }, + { + "epoch": 0.4999098720182659, + "grad_norm": 0.4255751073360443, + "learning_rate": 2.4081175840185022e-05, + "loss": 0.1728, + "step": 260 + }, + { + "epoch": 0.5191371747881992, + "grad_norm": 0.536382257938385, + "learning_rate": 2.3978867160667457e-05, + "loss": 0.147, + "step": 270 + }, + { + "epoch": 0.5383644775581325, + "grad_norm": 0.5623698830604553, + "learning_rate": 2.3871400424532493e-05, + "loss": 0.1863, + "step": 280 + }, + { + "epoch": 0.5575917803280659, + "grad_norm": 0.49679550528526306, + "learning_rate": 2.375882392221695e-05, + "loss": 0.1685, + "step": 290 + }, + { + "epoch": 0.5768190830979991, + "grad_norm": 0.5784851908683777, + "learning_rate": 2.36411882402434e-05, + "loss": 0.1506, + "step": 300 + }, + { + "epoch": 0.5960463858679325, + "grad_norm": 0.6098183393478394, + "learning_rate": 2.3518546238489e-05, + "loss": 0.1565, + "step": 310 + }, + { + "epoch": 0.6152736886378658, + "grad_norm": 0.5198598504066467, + "learning_rate": 2.339095302643273e-05, + "loss": 0.1433, + "step": 320 + }, + { + "epoch": 0.634500991407799, + "grad_norm": 0.5796005129814148, + "learning_rate": 2.325846593839188e-05, + "loss": 0.1668, + "step": 330 + }, + { + "epoch": 0.6537282941777324, + "grad_norm": 0.6006646752357483, + "learning_rate": 2.312114450775869e-05, + "loss": 0.1505, + "step": 340 + }, + { + "epoch": 0.6652646758396924, + "eval_loss": 0.1198095753788948, + "eval_runtime": 202.8938, + "eval_samples_per_second": 5.284, + "eval_steps_per_second": 5.284, + "step": 346 + }, + { + "epoch": 0.6729555969476657, + "grad_norm": 0.5787773728370667, + "learning_rate": 2.2979050440248896e-05, + "loss": 0.1442, + "step": 350 + }, + { + "epoch": 0.6921828997175989, + "grad_norm": 0.5230283141136169, + "learning_rate": 2.2832247586174118e-05, + "loss": 0.1555, + "step": 360 + }, + { + "epoch": 0.7114102024875323, + "grad_norm": 0.5551069378852844, + "learning_rate": 2.2680801911750558e-05, + "loss": 0.1422, + "step": 370 + }, + { + "epoch": 0.7306375052574656, + "grad_norm": 0.5769614577293396, + "learning_rate": 2.2524781469456928e-05, + "loss": 0.165, + "step": 380 + }, + { + "epoch": 0.7498648080273989, + "grad_norm": 0.6609200239181519, + "learning_rate": 2.2364256367454922e-05, + "loss": 0.161, + "step": 390 + }, + { + "epoch": 0.7690921107973322, + "grad_norm": 0.5530131459236145, + "learning_rate": 2.2199298738085907e-05, + "loss": 0.1709, + "step": 400 + }, + { + "epoch": 0.7883194135672655, + "grad_norm": 0.7019795775413513, + "learning_rate": 2.2029982705458107e-05, + "loss": 0.1471, + "step": 410 + }, + { + "epoch": 0.8075467163371989, + "grad_norm": 0.5327528715133667, + "learning_rate": 2.1856384352138765e-05, + "loss": 0.1913, + "step": 420 + }, + { + "epoch": 0.8267740191071321, + "grad_norm": 0.5548112988471985, + "learning_rate": 2.1678581684966235e-05, + "loss": 0.1509, + "step": 430 + }, + { + "epoch": 0.8460013218770654, + "grad_norm": 0.51619553565979, + "learning_rate": 2.149665459999743e-05, + "loss": 0.1341, + "step": 440 + }, + { + "epoch": 0.8652286246469988, + "grad_norm": 0.6642457842826843, + "learning_rate": 2.1310684846606346e-05, + "loss": 0.1458, + "step": 450 + }, + { + "epoch": 0.884455927416932, + "grad_norm": 0.48370271921157837, + "learning_rate": 2.1120755990749762e-05, + "loss": 0.1584, + "step": 460 + }, + { + "epoch": 0.9036832301868654, + "grad_norm": 0.8130201697349548, + "learning_rate": 2.092695337741671e-05, + "loss": 0.1389, + "step": 470 + }, + { + "epoch": 0.9229105329567987, + "grad_norm": 0.4986889958381653, + "learning_rate": 2.0729364092278456e-05, + "loss": 0.1263, + "step": 480 + }, + { + "epoch": 0.9421378357267319, + "grad_norm": 0.6791219711303711, + "learning_rate": 2.052807692255638e-05, + "loss": 0.1562, + "step": 490 + }, + { + "epoch": 0.9613651384966653, + "grad_norm": 0.6069239974021912, + "learning_rate": 2.0323182317125198e-05, + "loss": 0.1296, + "step": 500 + }, + { + "epoch": 0.9805924412665986, + "grad_norm": 0.6993957161903381, + "learning_rate": 2.011477234586957e-05, + "loss": 0.1695, + "step": 510 + }, + { + "epoch": 0.9978970137595385, + "eval_loss": 0.11108512431383133, + "eval_runtime": 202.9151, + "eval_samples_per_second": 5.283, + "eval_steps_per_second": 5.283, + "step": 519 + }, + { + "epoch": 0.9998197440365318, + "grad_norm": 0.5495030283927917, + "learning_rate": 1.9902940658312253e-05, + "loss": 0.1512, + "step": 520 + }, + { + "epoch": 1.0190470468064652, + "grad_norm": 0.5100754499435425, + "learning_rate": 1.968778244153246e-05, + "loss": 0.1088, + "step": 530 + }, + { + "epoch": 1.0382743495763984, + "grad_norm": 0.6836853623390198, + "learning_rate": 1.9469394377393335e-05, + "loss": 0.1524, + "step": 540 + }, + { + "epoch": 1.0575016523463319, + "grad_norm": 0.5304776430130005, + "learning_rate": 1.9247874599097714e-05, + "loss": 0.1239, + "step": 550 + }, + { + "epoch": 1.076728955116265, + "grad_norm": 0.6995298862457275, + "learning_rate": 1.9023322647091736e-05, + "loss": 0.1203, + "step": 560 + }, + { + "epoch": 1.0959562578861983, + "grad_norm": 0.579207181930542, + "learning_rate": 1.8795839424336097e-05, + "loss": 0.134, + "step": 570 + }, + { + "epoch": 1.1151835606561318, + "grad_norm": 0.4746134877204895, + "learning_rate": 1.8565527150965077e-05, + "loss": 0.1344, + "step": 580 + }, + { + "epoch": 1.134410863426065, + "grad_norm": 0.8127744793891907, + "learning_rate": 1.8332489318353655e-05, + "loss": 0.1157, + "step": 590 + }, + { + "epoch": 1.1536381661959982, + "grad_norm": 0.6949151158332825, + "learning_rate": 1.809683064261343e-05, + "loss": 0.1197, + "step": 600 + }, + { + "epoch": 1.1728654689659317, + "grad_norm": 0.6869731545448303, + "learning_rate": 1.7858657017538178e-05, + "loss": 0.1392, + "step": 610 + }, + { + "epoch": 1.192092771735865, + "grad_norm": 0.7461158037185669, + "learning_rate": 1.7618075467020213e-05, + "loss": 0.1262, + "step": 620 + }, + { + "epoch": 1.2113200745057981, + "grad_norm": 0.5442166924476624, + "learning_rate": 1.7375194096958946e-05, + "loss": 0.1258, + "step": 630 + }, + { + "epoch": 1.2305473772757316, + "grad_norm": 0.7670741081237793, + "learning_rate": 1.713012204668325e-05, + "loss": 0.1204, + "step": 640 + }, + { + "epoch": 1.2497746800456648, + "grad_norm": 0.3919640779495239, + "learning_rate": 1.6882969439909434e-05, + "loss": 0.1444, + "step": 650 + }, + { + "epoch": 1.269001982815598, + "grad_norm": 0.6234434247016907, + "learning_rate": 1.663384733525686e-05, + "loss": 0.1245, + "step": 660 + }, + { + "epoch": 1.2882292855855315, + "grad_norm": 0.7237009406089783, + "learning_rate": 1.638286767634353e-05, + "loss": 0.1258, + "step": 670 + }, + { + "epoch": 1.3074565883554647, + "grad_norm": 0.6398624181747437, + "learning_rate": 1.613014324148392e-05, + "loss": 0.1519, + "step": 680 + }, + { + "epoch": 1.326683891125398, + "grad_norm": 0.7676591873168945, + "learning_rate": 1.5875787593011784e-05, + "loss": 0.1545, + "step": 690 + }, + { + "epoch": 1.3305293516793848, + "eval_loss": 0.10604555904865265, + "eval_runtime": 203.0173, + "eval_samples_per_second": 5.28, + "eval_steps_per_second": 5.28, + "step": 692 + }, + { + "epoch": 1.3459111938953314, + "grad_norm": 0.5583875775337219, + "learning_rate": 1.5619915026250646e-05, + "loss": 0.1141, + "step": 700 + }, + { + "epoch": 1.3651384966652647, + "grad_norm": 0.5790243148803711, + "learning_rate": 1.536264051815491e-05, + "loss": 0.1326, + "step": 710 + }, + { + "epoch": 1.3843657994351979, + "grad_norm": 0.7467628121376038, + "learning_rate": 1.5104079675644706e-05, + "loss": 0.1439, + "step": 720 + }, + { + "epoch": 1.4035931022051313, + "grad_norm": 0.9867657423019409, + "learning_rate": 1.4844348683657616e-05, + "loss": 0.1385, + "step": 730 + }, + { + "epoch": 1.4228204049750646, + "grad_norm": 0.7909297347068787, + "learning_rate": 1.4583564252940735e-05, + "loss": 0.1259, + "step": 740 + }, + { + "epoch": 1.4420477077449978, + "grad_norm": 0.6159791350364685, + "learning_rate": 1.432184356760637e-05, + "loss": 0.1126, + "step": 750 + }, + { + "epoch": 1.4612750105149312, + "grad_norm": 0.6234619617462158, + "learning_rate": 1.4059304232475098e-05, + "loss": 0.1144, + "step": 760 + }, + { + "epoch": 1.4805023132848645, + "grad_norm": 0.7142959833145142, + "learning_rate": 1.3796064220229765e-05, + "loss": 0.1249, + "step": 770 + }, + { + "epoch": 1.4997296160547977, + "grad_norm": 0.6258341073989868, + "learning_rate": 1.3532241818404156e-05, + "loss": 0.1321, + "step": 780 + }, + { + "epoch": 1.5189569188247312, + "grad_norm": 0.5723307728767395, + "learning_rate": 1.326795557623022e-05, + "loss": 0.1193, + "step": 790 + }, + { + "epoch": 1.5381842215946644, + "grad_norm": 0.7454131841659546, + "learning_rate": 1.300332425136769e-05, + "loss": 0.1281, + "step": 800 + }, + { + "epoch": 1.5574115243645976, + "grad_norm": 0.5975070595741272, + "learning_rate": 1.273846675654003e-05, + "loss": 0.1321, + "step": 810 + }, + { + "epoch": 1.576638827134531, + "grad_norm": 0.7056507468223572, + "learning_rate": 1.2473502106100723e-05, + "loss": 0.1444, + "step": 820 + }, + { + "epoch": 1.5958661299044643, + "grad_norm": 0.7889280915260315, + "learning_rate": 1.2208549362553885e-05, + "loss": 0.1226, + "step": 830 + }, + { + "epoch": 1.6150934326743975, + "grad_norm": 0.7041313648223877, + "learning_rate": 1.194372758305325e-05, + "loss": 0.1316, + "step": 840 + }, + { + "epoch": 1.634320735444331, + "grad_norm": 0.7797935605049133, + "learning_rate": 1.1679155765903524e-05, + "loss": 0.132, + "step": 850 + }, + { + "epoch": 1.6535480382142642, + "grad_norm": 0.6426231861114502, + "learning_rate": 1.1414952797088248e-05, + "loss": 0.1101, + "step": 860 + }, + { + "epoch": 1.663161689599231, + "eval_loss": 0.10293085128068924, + "eval_runtime": 203.1567, + "eval_samples_per_second": 5.277, + "eval_steps_per_second": 5.277, + "step": 865 + }, + { + "epoch": 1.6727753409841974, + "grad_norm": 1.0461760759353638, + "learning_rate": 1.1151237396848058e-05, + "loss": 0.128, + "step": 870 + }, + { + "epoch": 1.692002643754131, + "grad_norm": 0.8692240118980408, + "learning_rate": 1.088812806633349e-05, + "loss": 0.1114, + "step": 880 + }, + { + "epoch": 1.7112299465240641, + "grad_norm": 0.5583866238594055, + "learning_rate": 1.0625743034356183e-05, + "loss": 0.1309, + "step": 890 + }, + { + "epoch": 1.7304572492939974, + "grad_norm": 0.5476118922233582, + "learning_rate": 1.0364200204262473e-05, + "loss": 0.1156, + "step": 900 + }, + { + "epoch": 1.7496845520639308, + "grad_norm": 0.8960713148117065, + "learning_rate": 1.0103617100953274e-05, + "loss": 0.1305, + "step": 910 + }, + { + "epoch": 1.768911854833864, + "grad_norm": 0.6927953958511353, + "learning_rate": 9.84411081807393e-06, + "loss": 0.1245, + "step": 920 + }, + { + "epoch": 1.7881391576037973, + "grad_norm": 0.5891989469528198, + "learning_rate": 9.585797965397949e-06, + "loss": 0.1125, + "step": 930 + }, + { + "epoch": 1.8073664603737307, + "grad_norm": 0.8319947123527527, + "learning_rate": 9.328794616428092e-06, + "loss": 0.1462, + "step": 940 + }, + { + "epoch": 1.826593763143664, + "grad_norm": 0.7439499497413635, + "learning_rate": 9.073216256238485e-06, + "loss": 0.1167, + "step": 950 + }, + { + "epoch": 1.8458210659135972, + "grad_norm": 0.7593638896942139, + "learning_rate": 8.8191777295811e-06, + "loss": 0.1356, + "step": 960 + }, + { + "epoch": 1.8650483686835306, + "grad_norm": 0.871376097202301, + "learning_rate": 8.56679318928e-06, + "loss": 0.1173, + "step": 970 + }, + { + "epoch": 1.884275671453464, + "grad_norm": 0.8772872090339661, + "learning_rate": 8.31617604493651e-06, + "loss": 0.1347, + "step": 980 + }, + { + "epoch": 1.903502974223397, + "grad_norm": 0.6309168934822083, + "learning_rate": 8.067438911968305e-06, + "loss": 0.1382, + "step": 990 + }, + { + "epoch": 1.9227302769933305, + "grad_norm": 0.775113046169281, + "learning_rate": 7.820693561005429e-06, + "loss": 0.1368, + "step": 1000 + }, + { + "epoch": 1.941957579763264, + "grad_norm": 0.9096739888191223, + "learning_rate": 7.576050867665876e-06, + "loss": 0.1263, + "step": 1010 + }, + { + "epoch": 1.961184882533197, + "grad_norm": 0.7637848258018494, + "learning_rate": 7.333620762733376e-06, + "loss": 0.1148, + "step": 1020 + }, + { + "epoch": 1.9804121853031305, + "grad_norm": 0.8084997534751892, + "learning_rate": 7.0935121827597245e-06, + "loss": 0.1457, + "step": 1030 + }, + { + "epoch": 1.995794027519077, + "eval_loss": 0.10069960355758667, + "eval_runtime": 203.0573, + "eval_samples_per_second": 5.279, + "eval_steps_per_second": 5.279, + "step": 1038 + }, + { + "epoch": 1.999639488073064, + "grad_norm": 1.0884274244308472, + "learning_rate": 6.855833021113886e-06, + "loss": 0.1641, + "step": 1040 + }, + { + "epoch": 2.018866790842997, + "grad_norm": 0.702237069606781, + "learning_rate": 6.620690079499835e-06, + "loss": 0.1159, + "step": 1050 + }, + { + "epoch": 2.0380940936129304, + "grad_norm": 0.6377178430557251, + "learning_rate": 6.388189019964976e-06, + "loss": 0.1103, + "step": 1060 + }, + { + "epoch": 2.057321396382864, + "grad_norm": 0.8843504786491394, + "learning_rate": 6.158434317420636e-06, + "loss": 0.1178, + "step": 1070 + }, + { + "epoch": 2.076548699152797, + "grad_norm": 0.42746174335479736, + "learning_rate": 5.931529212695996e-06, + "loss": 0.1143, + "step": 1080 + }, + { + "epoch": 2.0957760019227303, + "grad_norm": 0.7449749708175659, + "learning_rate": 5.70757566614661e-06, + "loss": 0.1262, + "step": 1090 + }, + { + "epoch": 2.1150033046926637, + "grad_norm": 0.6538805961608887, + "learning_rate": 5.48667431183824e-06, + "loss": 0.1344, + "step": 1100 + }, + { + "epoch": 2.1342306074625967, + "grad_norm": 0.8034993410110474, + "learning_rate": 5.268924412326709e-06, + "loss": 0.1447, + "step": 1110 + }, + { + "epoch": 2.15345791023253, + "grad_norm": 0.7438477277755737, + "learning_rate": 5.054423814054049e-06, + "loss": 0.1082, + "step": 1120 + }, + { + "epoch": 2.1726852130024636, + "grad_norm": 0.5646623373031616, + "learning_rate": 4.843268903380932e-06, + "loss": 0.1199, + "step": 1130 + }, + { + "epoch": 2.1919125157723967, + "grad_norm": 0.9965047240257263, + "learning_rate": 4.6355545632752575e-06, + "loss": 0.1303, + "step": 1140 + }, + { + "epoch": 2.21113981854233, + "grad_norm": 0.8709131479263306, + "learning_rate": 4.4313741306762495e-06, + "loss": 0.1107, + "step": 1150 + }, + { + "epoch": 2.2303671213122636, + "grad_norm": 0.6653530597686768, + "learning_rate": 4.230819354553279e-06, + "loss": 0.1053, + "step": 1160 + }, + { + "epoch": 2.2495944240821966, + "grad_norm": 0.766173243522644, + "learning_rate": 4.033980354678239e-06, + "loss": 0.1017, + "step": 1170 + }, + { + "epoch": 2.26882172685213, + "grad_norm": 0.5112572312355042, + "learning_rate": 3.840945581130008e-06, + "loss": 0.109, + "step": 1180 + }, + { + "epoch": 2.2880490296220635, + "grad_norm": 0.8744060397148132, + "learning_rate": 3.651801774549213e-06, + "loss": 0.1026, + "step": 1190 + }, + { + "epoch": 2.3072763323919965, + "grad_norm": 0.8215727806091309, + "learning_rate": 3.4666339271610836e-06, + "loss": 0.1058, + "step": 1200 + }, + { + "epoch": 2.32650363516193, + "grad_norm": 0.6597920656204224, + "learning_rate": 3.285525244584017e-06, + "loss": 0.1378, + "step": 1210 + }, + { + "epoch": 2.3284263654389235, + "eval_loss": 0.10013294219970703, + "eval_runtime": 203.5302, + "eval_samples_per_second": 5.267, + "eval_steps_per_second": 5.267, + "step": 1211 + }, + { + "epoch": 2.3457309379318634, + "grad_norm": 0.7206103205680847, + "learning_rate": 3.108557108440914e-06, + "loss": 0.1028, + "step": 1220 + }, + { + "epoch": 2.3649582407017964, + "grad_norm": 0.968497097492218, + "learning_rate": 2.9358090397901634e-06, + "loss": 0.1345, + "step": 1230 + }, + { + "epoch": 2.38418554347173, + "grad_norm": 0.7522798180580139, + "learning_rate": 2.767358663392658e-06, + "loss": 0.1029, + "step": 1240 + }, + { + "epoch": 2.4034128462416633, + "grad_norm": 0.8699542284011841, + "learning_rate": 2.6032816728309166e-06, + "loss": 0.1181, + "step": 1250 + }, + { + "epoch": 2.4226401490115963, + "grad_norm": 0.8779841661453247, + "learning_rate": 2.4436517964960005e-06, + "loss": 0.1028, + "step": 1260 + }, + { + "epoch": 2.4418674517815298, + "grad_norm": 0.6922764182090759, + "learning_rate": 2.2885407644574696e-06, + "loss": 0.1148, + "step": 1270 + }, + { + "epoch": 2.461094754551463, + "grad_norm": 0.7528237700462341, + "learning_rate": 2.1380182762313238e-06, + "loss": 0.1128, + "step": 1280 + }, + { + "epoch": 2.480322057321396, + "grad_norm": 0.8349286913871765, + "learning_rate": 1.992151969460333e-06, + "loss": 0.1027, + "step": 1290 + }, + { + "epoch": 2.4995493600913297, + "grad_norm": 0.8040717244148254, + "learning_rate": 1.8510073895209131e-06, + "loss": 0.1001, + "step": 1300 + }, + { + "epoch": 2.518776662861263, + "grad_norm": 0.8065551519393921, + "learning_rate": 1.7146479600701565e-06, + "loss": 0.1454, + "step": 1310 + }, + { + "epoch": 2.538003965631196, + "grad_norm": 0.7855721712112427, + "learning_rate": 1.5831349545462461e-06, + "loss": 0.1063, + "step": 1320 + }, + { + "epoch": 2.5572312684011296, + "grad_norm": 0.9087608456611633, + "learning_rate": 1.4565274686351022e-06, + "loss": 0.1155, + "step": 1330 + }, + { + "epoch": 2.576458571171063, + "grad_norm": 0.49701324105262756, + "learning_rate": 1.334882393715585e-06, + "loss": 0.1001, + "step": 1340 + }, + { + "epoch": 2.5956858739409965, + "grad_norm": 0.7943114638328552, + "learning_rate": 1.2182543912952178e-06, + "loss": 0.1107, + "step": 1350 + }, + { + "epoch": 2.6149131767109295, + "grad_norm": 0.8685261607170105, + "learning_rate": 1.1066958684479074e-06, + "loss": 0.1209, + "step": 1360 + }, + { + "epoch": 2.634140479480863, + "grad_norm": 1.0667730569839478, + "learning_rate": 1.0002569542646973e-06, + "loss": 0.1361, + "step": 1370 + }, + { + "epoch": 2.653367782250796, + "grad_norm": 0.6879278421401978, + "learning_rate": 8.989854773281486e-07, + "loss": 0.0925, + "step": 1380 + }, + { + "epoch": 2.6610587033587696, + "eval_loss": 0.09926149994134903, + "eval_runtime": 203.0153, + "eval_samples_per_second": 5.28, + "eval_steps_per_second": 5.28, + "step": 1384 + }, + { + "epoch": 2.6725950850207294, + "grad_norm": 0.7204756736755371, + "learning_rate": 8.029269442204348e-07, + "loss": 0.1148, + "step": 1390 + }, + { + "epoch": 2.691822387790663, + "grad_norm": 0.834997832775116, + "learning_rate": 7.121245190748708e-07, + "loss": 0.0918, + "step": 1400 + }, + { + "epoch": 2.7110496905605963, + "grad_norm": 0.8163384795188904, + "learning_rate": 6.266190041799805e-07, + "loss": 0.1345, + "step": 1410 + }, + { + "epoch": 2.7302769933305293, + "grad_norm": 0.6108123660087585, + "learning_rate": 5.464488216449154e-07, + "loss": 0.1235, + "step": 1420 + }, + { + "epoch": 2.7495042961004628, + "grad_norm": 0.8302232027053833, + "learning_rate": 4.716499961343698e-07, + "loss": 0.1163, + "step": 1430 + }, + { + "epoch": 2.7687315988703958, + "grad_norm": 0.670668363571167, + "learning_rate": 4.022561386808177e-07, + "loss": 0.1103, + "step": 1440 + }, + { + "epoch": 2.7879589016403292, + "grad_norm": 0.7220197319984436, + "learning_rate": 3.3829843158131175e-07, + "loss": 0.1228, + "step": 1450 + }, + { + "epoch": 2.8071862044102627, + "grad_norm": 0.5018804669380188, + "learning_rate": 2.798056143856462e-07, + "loss": 0.1225, + "step": 1460 + }, + { + "epoch": 2.826413507180196, + "grad_norm": 0.5343906283378601, + "learning_rate": 2.268039709821687e-07, + "loss": 0.0918, + "step": 1470 + }, + { + "epoch": 2.845640809950129, + "grad_norm": 0.6775656938552856, + "learning_rate": 1.7931731778705052e-07, + "loss": 0.0903, + "step": 1480 + }, + { + "epoch": 2.8648681127200626, + "grad_norm": 0.7841689586639404, + "learning_rate": 1.373669930423288e-07, + "loss": 0.1308, + "step": 1490 + }, + { + "epoch": 2.8840954154899956, + "grad_norm": 0.8570185303688049, + "learning_rate": 1.0097184722750592e-07, + "loss": 0.1287, + "step": 1500 + }, + { + "epoch": 2.903322718259929, + "grad_norm": 0.635200023651123, + "learning_rate": 7.014823458905001e-08, + "loss": 0.1011, + "step": 1510 + }, + { + "epoch": 2.9225500210298625, + "grad_norm": 0.7127873301506042, + "learning_rate": 4.4910005791570786e-08, + "loss": 0.1345, + "step": 1520 + }, + { + "epoch": 2.941777323799796, + "grad_norm": 0.9114808440208435, + "learning_rate": 2.526850169399103e-08, + "loss": 0.1132, + "step": 1530 + }, + { + "epoch": 2.961004626569729, + "grad_norm": 0.7554405927658081, + "learning_rate": 1.1232548253503616e-08, + "loss": 0.1091, + "step": 1540 + }, + { + "epoch": 2.9802319293396624, + "grad_norm": 0.7547165155410767, + "learning_rate": 2.8084525596064337e-09, + "loss": 0.0944, + "step": 1550 + }, + { + "epoch": 2.9936910412786157, + "eval_loss": 0.09935057163238525, + "eval_runtime": 203.0468, + "eval_samples_per_second": 5.28, + "eval_steps_per_second": 5.28, + "step": 1557 + } + ], + "logging_steps": 10, + "max_steps": 1560, + "num_input_tokens_seen": 0, + "num_train_epochs": 3, + "save_steps": 173, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 1.3597578280412774e+18, + "train_batch_size": 1, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-1557/training_args.bin b/checkpoint-1557/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..80c5d5c3761b0cd827095650fdc744b87401c78b --- /dev/null +++ b/checkpoint-1557/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:70ce65338e377750e8ed719c3c255445a47816429c580e0d811222e2d91c722b +size 5624 diff --git a/checkpoint-1560/README.md b/checkpoint-1560/README.md new file mode 100644 index 0000000000000000000000000000000000000000..20c165c7feaa880a18e0662941a5b246a698be2e --- /dev/null +++ b/checkpoint-1560/README.md @@ -0,0 +1,209 @@ +--- +base_model: Salesforce/Llama-xLAM-2-8b-fc-r +library_name: peft +pipeline_tag: text-generation +tags: +- base_model:adapter:Salesforce/Llama-xLAM-2-8b-fc-r +- lora +- sft +- transformers +- trl +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.17.1 \ No newline at end of file diff --git a/checkpoint-1560/adapter_config.json b/checkpoint-1560/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..53e6fe7efe54ad6e008096a354e97649fac2cc73 --- /dev/null +++ b/checkpoint-1560/adapter_config.json @@ -0,0 +1,39 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "Salesforce/Llama-xLAM-2-8b-fc-r", + "bias": "none", + "corda_config": null, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 32, + "lora_bias": false, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "qalora_group_size": 16, + "r": 16, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "k_proj", + "q_proj", + "o_proj", + "v_proj" + ], + "target_parameters": null, + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/checkpoint-1560/adapter_model.safetensors b/checkpoint-1560/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..02f742d1d08f382f26b89ee1b441b41ddfc4ef36 --- /dev/null +++ b/checkpoint-1560/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fae346f48bf371227e763693785b0e3ca783b9e23e1632d2d0afb8c67d8df207 +size 54560368 diff --git a/checkpoint-1560/optimizer.pt b/checkpoint-1560/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..eff42a2ef39467eb192f26a63c41a28c776b252d --- /dev/null +++ b/checkpoint-1560/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c06434039119c4b58f4d7d7f8ed1cccf9c9a89f0a75cd2cc127749c311827ff8 +size 109267450 diff --git a/checkpoint-1560/rng_state.pth b/checkpoint-1560/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..7e4de1dd7e230840295940a6fcba1c4c639b8962 --- /dev/null +++ b/checkpoint-1560/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ee19540ee60e62653b9280737702b5741df0629dd2bb39e1766d6d7423336f36 +size 14244 diff --git a/checkpoint-1560/scheduler.pt b/checkpoint-1560/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..eedb63c65cb0873641321abda22b7038a966f1d4 --- /dev/null +++ b/checkpoint-1560/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6f11e685a8a38bdf251703c7596138297d1bd1178cb0ee9a798fcd62f486db34 +size 1064 diff --git a/checkpoint-1560/special_tokens_map.json b/checkpoint-1560/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..1c69f55fd7bb10288d16d4c76497e21e25adfe4f --- /dev/null +++ b/checkpoint-1560/special_tokens_map.json @@ -0,0 +1,27 @@ +{ + "additional_special_tokens": [ + "<|eot_id|>", + "<|eom_id|>" + ], + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/checkpoint-1560/tokenizer.json b/checkpoint-1560/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..1c1d8d5c9024994f1d3b00f9662b8dd89ca13cf2 --- /dev/null +++ b/checkpoint-1560/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b9e4e7fb171f92fd137b777cc2714bf87d11576700a1dcd7a399e7bbe39537b +size 17209920 diff --git a/checkpoint-1560/tokenizer_config.json b/checkpoint-1560/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..3c3fc3b2c234aa8b39e8ee01e3ee9e6fe54a2a1f --- /dev/null +++ b/checkpoint-1560/tokenizer_config.json @@ -0,0 +1,2070 @@ +{ + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|python_tag|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "additional_special_tokens": [ + "<|eot_id|>", + "<|eom_id|>" + ], + "bos_token": "<|begin_of_text|>", + "chat_template": "{{- bos_token }}\n{%- if custom_tools is defined %}\n {%- set tools = custom_tools %}\n{%- endif %}\n{%- if not tools_in_user_message is defined %}\n {%- set tools_in_user_message = true %}\n{%- endif %}\n{%- if not tools is defined %}\n {%- set tools = none %}\n{%- endif %}\n\n{#- Extract system message #}\n{{- \"<|start_header_id|>system<|end_header_id|>\\n\\n\" }}\n{%- if messages[0]['role'] == 'system' %}\n {%- set system_message = messages[0]['content'] | trim %}\n {%- set messages = messages[1:] %}\n {{- system_message + \"\\n\" }}\n{%- else %}\n {%- set system_message = \"You are a helpful assistant that can use tools. You are developed by Salesforce xLAM team.\" %}\n {% set format_instruction %}You have access to a set of tools. When using tools, make calls in a single JSON array: \n\n[{\"name\": \"tool_call_name\", \"arguments\": {\"arg1\": \"value1\", \"arg2\": \"value2\"}}, ... (additional parallel tool calls as needed)]\n\nIf no tool is suitable, state that explicitly. If the user's input lacks required parameters, ask for clarification. Do not interpret or respond until tool results are returned. Once they are available, process them or make additional calls if needed. For tasks that don't require tools, such as casual conversation or general advice, respond directly in plain text. The available tools are:{% endset %}\n {{- system_message + \"\\n\" }}\n {%- if tools is not none %}\n {{- format_instruction + \"\\n\\n\" }}\n {%- endif %}\n{%- endif %}\n\n\n{%- if tools is not none %}\n {%- for t in tools %}\n {{- t | tojson(indent=4) }}\n {{- \"\\n\\n\" }}\n {%- endfor %}\n{%- endif %}\n{{- \"<|eot_id|>\" }}\n\n{%- for message in messages %}\n {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %}\n {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\\n\\n'+ message['content'] | trim + '<|eot_id|>' }}\n {%- elif 'tool_calls' in message %}\n {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' -}}\n {%- if message['tool_calls'] %}\n {{- \"[\" }}\n {%- for tool_call_function in message.tool_calls %}\n {%- set tool_call = tool_call_function.function %}\n {{- '{\"name\": \"' + tool_call.name + '\", ' }}\n {{- '\"arguments\": ' }}\n {{- tool_call.arguments | tojson }}\n {{- \"}\" }}\n {%- if not loop.last %}\n {{- \", \" }}\n {%- endif %}\n {%- endfor %}\n {{- \"]\" }}\n {{- \"<|eot_id|>\" }}\n {%- elif message['content'] %}\n {{- message['content'] | trim + '<|eot_id|>' }}\n {%- else %}\n {{- \"[]\\n\" + '<|eot_id|>' }}\n {%- endif %}\n {%- elif message.role == \"tool\" or message.role == \"ipython\" %}\n {{- \"<|start_header_id|>\" + \"ipython\" + \"<|end_header_id|>\\n\\n\" }}\n {%- set content = message[\"content\"] %}\n {%- if content is mapping or (content is iterable and content is not string) %}\n {{- content | tojson }}\n {%- else %}\n {{- content }}\n {%- endif %}\n {{- \"<|eot_id|>\" }}\n {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' }}\n{%- endif %}", + "clean_up_tokenization_spaces": true, + "eos_token": "<|eot_id|>", + "extra_special_tokens": {}, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 16384, + "pad_token": "<|eot_id|>", + "padding_side": "right", + "split_special_tokens": false, + "tokenizer_class": "PreTrainedTokenizerFast" +} diff --git a/checkpoint-1560/trainer_state.json b/checkpoint-1560/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..c0531910bf6e7b01f2f9e45d9233b5807049ffa4 --- /dev/null +++ b/checkpoint-1560/trainer_state.json @@ -0,0 +1,1197 @@ +{ + "best_metric": 0.09926149994134903, + "best_model_checkpoint": "./xlam_lora_new_2560_1_delete_over_size_3epoch_multi/checkpoint-1384", + "epoch": 2.9994592321095954, + "eval_steps": 173, + "global_step": 1560, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.019227302769933306, + "grad_norm": 0.8050442337989807, + "learning_rate": 3.205128205128205e-06, + "loss": 0.6642, + "step": 10 + }, + { + "epoch": 0.03845460553986661, + "grad_norm": 0.45196670293807983, + "learning_rate": 6.41025641025641e-06, + "loss": 0.6377, + "step": 20 + }, + { + "epoch": 0.05768190830979992, + "grad_norm": 0.47538116574287415, + "learning_rate": 9.615384615384616e-06, + "loss": 0.6205, + "step": 30 + }, + { + "epoch": 0.07690921107973323, + "grad_norm": 0.34218236804008484, + "learning_rate": 1.282051282051282e-05, + "loss": 0.5579, + "step": 40 + }, + { + "epoch": 0.09613651384966652, + "grad_norm": 0.38529354333877563, + "learning_rate": 1.602564102564103e-05, + "loss": 0.4393, + "step": 50 + }, + { + "epoch": 0.11536381661959984, + "grad_norm": 0.3676348924636841, + "learning_rate": 1.923076923076923e-05, + "loss": 0.3965, + "step": 60 + }, + { + "epoch": 0.13459111938953314, + "grad_norm": 0.3427989184856415, + "learning_rate": 2.2435897435897437e-05, + "loss": 0.3488, + "step": 70 + }, + { + "epoch": 0.15381842215946645, + "grad_norm": 0.30555886030197144, + "learning_rate": 2.4999887657859027e-05, + "loss": 0.2224, + "step": 80 + }, + { + "epoch": 0.17304572492939974, + "grad_norm": 0.3501119315624237, + "learning_rate": 2.4995955894949523e-05, + "loss": 0.248, + "step": 90 + }, + { + "epoch": 0.19227302769933305, + "grad_norm": 0.36164987087249756, + "learning_rate": 2.4986409044149163e-05, + "loss": 0.2322, + "step": 100 + }, + { + "epoch": 0.21150033046926636, + "grad_norm": 0.3375028967857361, + "learning_rate": 2.4971251395358342e-05, + "loss": 0.2427, + "step": 110 + }, + { + "epoch": 0.23072763323919968, + "grad_norm": 0.3321882486343384, + "learning_rate": 2.495048975970308e-05, + "loss": 0.1967, + "step": 120 + }, + { + "epoch": 0.24995493600913296, + "grad_norm": 0.2828320562839508, + "learning_rate": 2.492413346647437e-05, + "loss": 0.1636, + "step": 130 + }, + { + "epoch": 0.2691822387790663, + "grad_norm": 0.3430372476577759, + "learning_rate": 2.4892194358936095e-05, + "loss": 0.2041, + "step": 140 + }, + { + "epoch": 0.28840954154899956, + "grad_norm": 0.3393559455871582, + "learning_rate": 2.4854686789003173e-05, + "loss": 0.1911, + "step": 150 + }, + { + "epoch": 0.3076368443189329, + "grad_norm": 0.36908936500549316, + "learning_rate": 2.4811627610792543e-05, + "loss": 0.2025, + "step": 160 + }, + { + "epoch": 0.3268641470888662, + "grad_norm": 0.38679710030555725, + "learning_rate": 2.4763036173049677e-05, + "loss": 0.1566, + "step": 170 + }, + { + "epoch": 0.3326323379198462, + "eval_loss": 0.14381718635559082, + "eval_runtime": 202.8561, + "eval_samples_per_second": 5.285, + "eval_steps_per_second": 5.285, + "step": 173 + }, + { + "epoch": 0.34609144985879947, + "grad_norm": 0.39648687839508057, + "learning_rate": 2.4708934310454207e-05, + "loss": 0.1943, + "step": 180 + }, + { + "epoch": 0.3653187526287328, + "grad_norm": 0.3979399800300598, + "learning_rate": 2.4649346333808458e-05, + "loss": 0.1594, + "step": 190 + }, + { + "epoch": 0.3845460553986661, + "grad_norm": 0.34854283928871155, + "learning_rate": 2.458429901911331e-05, + "loss": 0.1683, + "step": 200 + }, + { + "epoch": 0.40377335816859944, + "grad_norm": 0.26675811409950256, + "learning_rate": 2.4513821595536356e-05, + "loss": 0.1616, + "step": 210 + }, + { + "epoch": 0.4230006609385327, + "grad_norm": 0.4399104118347168, + "learning_rate": 2.44379457322777e-05, + "loss": 0.1664, + "step": 220 + }, + { + "epoch": 0.442227963708466, + "grad_norm": 0.5316939353942871, + "learning_rate": 2.4356705524339317e-05, + "loss": 0.1745, + "step": 230 + }, + { + "epoch": 0.46145526647839935, + "grad_norm": 0.5996547341346741, + "learning_rate": 2.4270137477204408e-05, + "loss": 0.1753, + "step": 240 + }, + { + "epoch": 0.48068256924833264, + "grad_norm": 0.4330001175403595, + "learning_rate": 2.417828049043353e-05, + "loss": 0.1997, + "step": 250 + }, + { + "epoch": 0.4999098720182659, + "grad_norm": 0.4255751073360443, + "learning_rate": 2.4081175840185022e-05, + "loss": 0.1728, + "step": 260 + }, + { + "epoch": 0.5191371747881992, + "grad_norm": 0.536382257938385, + "learning_rate": 2.3978867160667457e-05, + "loss": 0.147, + "step": 270 + }, + { + "epoch": 0.5383644775581325, + "grad_norm": 0.5623698830604553, + "learning_rate": 2.3871400424532493e-05, + "loss": 0.1863, + "step": 280 + }, + { + "epoch": 0.5575917803280659, + "grad_norm": 0.49679550528526306, + "learning_rate": 2.375882392221695e-05, + "loss": 0.1685, + "step": 290 + }, + { + "epoch": 0.5768190830979991, + "grad_norm": 0.5784851908683777, + "learning_rate": 2.36411882402434e-05, + "loss": 0.1506, + "step": 300 + }, + { + "epoch": 0.5960463858679325, + "grad_norm": 0.6098183393478394, + "learning_rate": 2.3518546238489e-05, + "loss": 0.1565, + "step": 310 + }, + { + "epoch": 0.6152736886378658, + "grad_norm": 0.5198598504066467, + "learning_rate": 2.339095302643273e-05, + "loss": 0.1433, + "step": 320 + }, + { + "epoch": 0.634500991407799, + "grad_norm": 0.5796005129814148, + "learning_rate": 2.325846593839188e-05, + "loss": 0.1668, + "step": 330 + }, + { + "epoch": 0.6537282941777324, + "grad_norm": 0.6006646752357483, + "learning_rate": 2.312114450775869e-05, + "loss": 0.1505, + "step": 340 + }, + { + "epoch": 0.6652646758396924, + "eval_loss": 0.1198095753788948, + "eval_runtime": 202.8938, + "eval_samples_per_second": 5.284, + "eval_steps_per_second": 5.284, + "step": 346 + }, + { + "epoch": 0.6729555969476657, + "grad_norm": 0.5787773728370667, + "learning_rate": 2.2979050440248896e-05, + "loss": 0.1442, + "step": 350 + }, + { + "epoch": 0.6921828997175989, + "grad_norm": 0.5230283141136169, + "learning_rate": 2.2832247586174118e-05, + "loss": 0.1555, + "step": 360 + }, + { + "epoch": 0.7114102024875323, + "grad_norm": 0.5551069378852844, + "learning_rate": 2.2680801911750558e-05, + "loss": 0.1422, + "step": 370 + }, + { + "epoch": 0.7306375052574656, + "grad_norm": 0.5769614577293396, + "learning_rate": 2.2524781469456928e-05, + "loss": 0.165, + "step": 380 + }, + { + "epoch": 0.7498648080273989, + "grad_norm": 0.6609200239181519, + "learning_rate": 2.2364256367454922e-05, + "loss": 0.161, + "step": 390 + }, + { + "epoch": 0.7690921107973322, + "grad_norm": 0.5530131459236145, + "learning_rate": 2.2199298738085907e-05, + "loss": 0.1709, + "step": 400 + }, + { + "epoch": 0.7883194135672655, + "grad_norm": 0.7019795775413513, + "learning_rate": 2.2029982705458107e-05, + "loss": 0.1471, + "step": 410 + }, + { + "epoch": 0.8075467163371989, + "grad_norm": 0.5327528715133667, + "learning_rate": 2.1856384352138765e-05, + "loss": 0.1913, + "step": 420 + }, + { + "epoch": 0.8267740191071321, + "grad_norm": 0.5548112988471985, + "learning_rate": 2.1678581684966235e-05, + "loss": 0.1509, + "step": 430 + }, + { + "epoch": 0.8460013218770654, + "grad_norm": 0.51619553565979, + "learning_rate": 2.149665459999743e-05, + "loss": 0.1341, + "step": 440 + }, + { + "epoch": 0.8652286246469988, + "grad_norm": 0.6642457842826843, + "learning_rate": 2.1310684846606346e-05, + "loss": 0.1458, + "step": 450 + }, + { + "epoch": 0.884455927416932, + "grad_norm": 0.48370271921157837, + "learning_rate": 2.1120755990749762e-05, + "loss": 0.1584, + "step": 460 + }, + { + "epoch": 0.9036832301868654, + "grad_norm": 0.8130201697349548, + "learning_rate": 2.092695337741671e-05, + "loss": 0.1389, + "step": 470 + }, + { + "epoch": 0.9229105329567987, + "grad_norm": 0.4986889958381653, + "learning_rate": 2.0729364092278456e-05, + "loss": 0.1263, + "step": 480 + }, + { + "epoch": 0.9421378357267319, + "grad_norm": 0.6791219711303711, + "learning_rate": 2.052807692255638e-05, + "loss": 0.1562, + "step": 490 + }, + { + "epoch": 0.9613651384966653, + "grad_norm": 0.6069239974021912, + "learning_rate": 2.0323182317125198e-05, + "loss": 0.1296, + "step": 500 + }, + { + "epoch": 0.9805924412665986, + "grad_norm": 0.6993957161903381, + "learning_rate": 2.011477234586957e-05, + "loss": 0.1695, + "step": 510 + }, + { + "epoch": 0.9978970137595385, + "eval_loss": 0.11108512431383133, + "eval_runtime": 202.9151, + "eval_samples_per_second": 5.283, + "eval_steps_per_second": 5.283, + "step": 519 + }, + { + "epoch": 0.9998197440365318, + "grad_norm": 0.5495030283927917, + "learning_rate": 1.9902940658312253e-05, + "loss": 0.1512, + "step": 520 + }, + { + "epoch": 1.0190470468064652, + "grad_norm": 0.5100754499435425, + "learning_rate": 1.968778244153246e-05, + "loss": 0.1088, + "step": 530 + }, + { + "epoch": 1.0382743495763984, + "grad_norm": 0.6836853623390198, + "learning_rate": 1.9469394377393335e-05, + "loss": 0.1524, + "step": 540 + }, + { + "epoch": 1.0575016523463319, + "grad_norm": 0.5304776430130005, + "learning_rate": 1.9247874599097714e-05, + "loss": 0.1239, + "step": 550 + }, + { + "epoch": 1.076728955116265, + "grad_norm": 0.6995298862457275, + "learning_rate": 1.9023322647091736e-05, + "loss": 0.1203, + "step": 560 + }, + { + "epoch": 1.0959562578861983, + "grad_norm": 0.579207181930542, + "learning_rate": 1.8795839424336097e-05, + "loss": 0.134, + "step": 570 + }, + { + "epoch": 1.1151835606561318, + "grad_norm": 0.4746134877204895, + "learning_rate": 1.8565527150965077e-05, + "loss": 0.1344, + "step": 580 + }, + { + "epoch": 1.134410863426065, + "grad_norm": 0.8127744793891907, + "learning_rate": 1.8332489318353655e-05, + "loss": 0.1157, + "step": 590 + }, + { + "epoch": 1.1536381661959982, + "grad_norm": 0.6949151158332825, + "learning_rate": 1.809683064261343e-05, + "loss": 0.1197, + "step": 600 + }, + { + "epoch": 1.1728654689659317, + "grad_norm": 0.6869731545448303, + "learning_rate": 1.7858657017538178e-05, + "loss": 0.1392, + "step": 610 + }, + { + "epoch": 1.192092771735865, + "grad_norm": 0.7461158037185669, + "learning_rate": 1.7618075467020213e-05, + "loss": 0.1262, + "step": 620 + }, + { + "epoch": 1.2113200745057981, + "grad_norm": 0.5442166924476624, + "learning_rate": 1.7375194096958946e-05, + "loss": 0.1258, + "step": 630 + }, + { + "epoch": 1.2305473772757316, + "grad_norm": 0.7670741081237793, + "learning_rate": 1.713012204668325e-05, + "loss": 0.1204, + "step": 640 + }, + { + "epoch": 1.2497746800456648, + "grad_norm": 0.3919640779495239, + "learning_rate": 1.6882969439909434e-05, + "loss": 0.1444, + "step": 650 + }, + { + "epoch": 1.269001982815598, + "grad_norm": 0.6234434247016907, + "learning_rate": 1.663384733525686e-05, + "loss": 0.1245, + "step": 660 + }, + { + "epoch": 1.2882292855855315, + "grad_norm": 0.7237009406089783, + "learning_rate": 1.638286767634353e-05, + "loss": 0.1258, + "step": 670 + }, + { + "epoch": 1.3074565883554647, + "grad_norm": 0.6398624181747437, + "learning_rate": 1.613014324148392e-05, + "loss": 0.1519, + "step": 680 + }, + { + "epoch": 1.326683891125398, + "grad_norm": 0.7676591873168945, + "learning_rate": 1.5875787593011784e-05, + "loss": 0.1545, + "step": 690 + }, + { + "epoch": 1.3305293516793848, + "eval_loss": 0.10604555904865265, + "eval_runtime": 203.0173, + "eval_samples_per_second": 5.28, + "eval_steps_per_second": 5.28, + "step": 692 + }, + { + "epoch": 1.3459111938953314, + "grad_norm": 0.5583875775337219, + "learning_rate": 1.5619915026250646e-05, + "loss": 0.1141, + "step": 700 + }, + { + "epoch": 1.3651384966652647, + "grad_norm": 0.5790243148803711, + "learning_rate": 1.536264051815491e-05, + "loss": 0.1326, + "step": 710 + }, + { + "epoch": 1.3843657994351979, + "grad_norm": 0.7467628121376038, + "learning_rate": 1.5104079675644706e-05, + "loss": 0.1439, + "step": 720 + }, + { + "epoch": 1.4035931022051313, + "grad_norm": 0.9867657423019409, + "learning_rate": 1.4844348683657616e-05, + "loss": 0.1385, + "step": 730 + }, + { + "epoch": 1.4228204049750646, + "grad_norm": 0.7909297347068787, + "learning_rate": 1.4583564252940735e-05, + "loss": 0.1259, + "step": 740 + }, + { + "epoch": 1.4420477077449978, + "grad_norm": 0.6159791350364685, + "learning_rate": 1.432184356760637e-05, + "loss": 0.1126, + "step": 750 + }, + { + "epoch": 1.4612750105149312, + "grad_norm": 0.6234619617462158, + "learning_rate": 1.4059304232475098e-05, + "loss": 0.1144, + "step": 760 + }, + { + "epoch": 1.4805023132848645, + "grad_norm": 0.7142959833145142, + "learning_rate": 1.3796064220229765e-05, + "loss": 0.1249, + "step": 770 + }, + { + "epoch": 1.4997296160547977, + "grad_norm": 0.6258341073989868, + "learning_rate": 1.3532241818404156e-05, + "loss": 0.1321, + "step": 780 + }, + { + "epoch": 1.5189569188247312, + "grad_norm": 0.5723307728767395, + "learning_rate": 1.326795557623022e-05, + "loss": 0.1193, + "step": 790 + }, + { + "epoch": 1.5381842215946644, + "grad_norm": 0.7454131841659546, + "learning_rate": 1.300332425136769e-05, + "loss": 0.1281, + "step": 800 + }, + { + "epoch": 1.5574115243645976, + "grad_norm": 0.5975070595741272, + "learning_rate": 1.273846675654003e-05, + "loss": 0.1321, + "step": 810 + }, + { + "epoch": 1.576638827134531, + "grad_norm": 0.7056507468223572, + "learning_rate": 1.2473502106100723e-05, + "loss": 0.1444, + "step": 820 + }, + { + "epoch": 1.5958661299044643, + "grad_norm": 0.7889280915260315, + "learning_rate": 1.2208549362553885e-05, + "loss": 0.1226, + "step": 830 + }, + { + "epoch": 1.6150934326743975, + "grad_norm": 0.7041313648223877, + "learning_rate": 1.194372758305325e-05, + "loss": 0.1316, + "step": 840 + }, + { + "epoch": 1.634320735444331, + "grad_norm": 0.7797935605049133, + "learning_rate": 1.1679155765903524e-05, + "loss": 0.132, + "step": 850 + }, + { + "epoch": 1.6535480382142642, + "grad_norm": 0.6426231861114502, + "learning_rate": 1.1414952797088248e-05, + "loss": 0.1101, + "step": 860 + }, + { + "epoch": 1.663161689599231, + "eval_loss": 0.10293085128068924, + "eval_runtime": 203.1567, + "eval_samples_per_second": 5.277, + "eval_steps_per_second": 5.277, + "step": 865 + }, + { + "epoch": 1.6727753409841974, + "grad_norm": 1.0461760759353638, + "learning_rate": 1.1151237396848058e-05, + "loss": 0.128, + "step": 870 + }, + { + "epoch": 1.692002643754131, + "grad_norm": 0.8692240118980408, + "learning_rate": 1.088812806633349e-05, + "loss": 0.1114, + "step": 880 + }, + { + "epoch": 1.7112299465240641, + "grad_norm": 0.5583866238594055, + "learning_rate": 1.0625743034356183e-05, + "loss": 0.1309, + "step": 890 + }, + { + "epoch": 1.7304572492939974, + "grad_norm": 0.5476118922233582, + "learning_rate": 1.0364200204262473e-05, + "loss": 0.1156, + "step": 900 + }, + { + "epoch": 1.7496845520639308, + "grad_norm": 0.8960713148117065, + "learning_rate": 1.0103617100953274e-05, + "loss": 0.1305, + "step": 910 + }, + { + "epoch": 1.768911854833864, + "grad_norm": 0.6927953958511353, + "learning_rate": 9.84411081807393e-06, + "loss": 0.1245, + "step": 920 + }, + { + "epoch": 1.7881391576037973, + "grad_norm": 0.5891989469528198, + "learning_rate": 9.585797965397949e-06, + "loss": 0.1125, + "step": 930 + }, + { + "epoch": 1.8073664603737307, + "grad_norm": 0.8319947123527527, + "learning_rate": 9.328794616428092e-06, + "loss": 0.1462, + "step": 940 + }, + { + "epoch": 1.826593763143664, + "grad_norm": 0.7439499497413635, + "learning_rate": 9.073216256238485e-06, + "loss": 0.1167, + "step": 950 + }, + { + "epoch": 1.8458210659135972, + "grad_norm": 0.7593638896942139, + "learning_rate": 8.8191777295811e-06, + "loss": 0.1356, + "step": 960 + }, + { + "epoch": 1.8650483686835306, + "grad_norm": 0.871376097202301, + "learning_rate": 8.56679318928e-06, + "loss": 0.1173, + "step": 970 + }, + { + "epoch": 1.884275671453464, + "grad_norm": 0.8772872090339661, + "learning_rate": 8.31617604493651e-06, + "loss": 0.1347, + "step": 980 + }, + { + "epoch": 1.903502974223397, + "grad_norm": 0.6309168934822083, + "learning_rate": 8.067438911968305e-06, + "loss": 0.1382, + "step": 990 + }, + { + "epoch": 1.9227302769933305, + "grad_norm": 0.775113046169281, + "learning_rate": 7.820693561005429e-06, + "loss": 0.1368, + "step": 1000 + }, + { + "epoch": 1.941957579763264, + "grad_norm": 0.9096739888191223, + "learning_rate": 7.576050867665876e-06, + "loss": 0.1263, + "step": 1010 + }, + { + "epoch": 1.961184882533197, + "grad_norm": 0.7637848258018494, + "learning_rate": 7.333620762733376e-06, + "loss": 0.1148, + "step": 1020 + }, + { + "epoch": 1.9804121853031305, + "grad_norm": 0.8084997534751892, + "learning_rate": 7.0935121827597245e-06, + "loss": 0.1457, + "step": 1030 + }, + { + "epoch": 1.995794027519077, + "eval_loss": 0.10069960355758667, + "eval_runtime": 203.0573, + "eval_samples_per_second": 5.279, + "eval_steps_per_second": 5.279, + "step": 1038 + }, + { + "epoch": 1.999639488073064, + "grad_norm": 1.0884274244308472, + "learning_rate": 6.855833021113886e-06, + "loss": 0.1641, + "step": 1040 + }, + { + "epoch": 2.018866790842997, + "grad_norm": 0.702237069606781, + "learning_rate": 6.620690079499835e-06, + "loss": 0.1159, + "step": 1050 + }, + { + "epoch": 2.0380940936129304, + "grad_norm": 0.6377178430557251, + "learning_rate": 6.388189019964976e-06, + "loss": 0.1103, + "step": 1060 + }, + { + "epoch": 2.057321396382864, + "grad_norm": 0.8843504786491394, + "learning_rate": 6.158434317420636e-06, + "loss": 0.1178, + "step": 1070 + }, + { + "epoch": 2.076548699152797, + "grad_norm": 0.42746174335479736, + "learning_rate": 5.931529212695996e-06, + "loss": 0.1143, + "step": 1080 + }, + { + "epoch": 2.0957760019227303, + "grad_norm": 0.7449749708175659, + "learning_rate": 5.70757566614661e-06, + "loss": 0.1262, + "step": 1090 + }, + { + "epoch": 2.1150033046926637, + "grad_norm": 0.6538805961608887, + "learning_rate": 5.48667431183824e-06, + "loss": 0.1344, + "step": 1100 + }, + { + "epoch": 2.1342306074625967, + "grad_norm": 0.8034993410110474, + "learning_rate": 5.268924412326709e-06, + "loss": 0.1447, + "step": 1110 + }, + { + "epoch": 2.15345791023253, + "grad_norm": 0.7438477277755737, + "learning_rate": 5.054423814054049e-06, + "loss": 0.1082, + "step": 1120 + }, + { + "epoch": 2.1726852130024636, + "grad_norm": 0.5646623373031616, + "learning_rate": 4.843268903380932e-06, + "loss": 0.1199, + "step": 1130 + }, + { + "epoch": 2.1919125157723967, + "grad_norm": 0.9965047240257263, + "learning_rate": 4.6355545632752575e-06, + "loss": 0.1303, + "step": 1140 + }, + { + "epoch": 2.21113981854233, + "grad_norm": 0.8709131479263306, + "learning_rate": 4.4313741306762495e-06, + "loss": 0.1107, + "step": 1150 + }, + { + "epoch": 2.2303671213122636, + "grad_norm": 0.6653530597686768, + "learning_rate": 4.230819354553279e-06, + "loss": 0.1053, + "step": 1160 + }, + { + "epoch": 2.2495944240821966, + "grad_norm": 0.766173243522644, + "learning_rate": 4.033980354678239e-06, + "loss": 0.1017, + "step": 1170 + }, + { + "epoch": 2.26882172685213, + "grad_norm": 0.5112572312355042, + "learning_rate": 3.840945581130008e-06, + "loss": 0.109, + "step": 1180 + }, + { + "epoch": 2.2880490296220635, + "grad_norm": 0.8744060397148132, + "learning_rate": 3.651801774549213e-06, + "loss": 0.1026, + "step": 1190 + }, + { + "epoch": 2.3072763323919965, + "grad_norm": 0.8215727806091309, + "learning_rate": 3.4666339271610836e-06, + "loss": 0.1058, + "step": 1200 + }, + { + "epoch": 2.32650363516193, + "grad_norm": 0.6597920656204224, + "learning_rate": 3.285525244584017e-06, + "loss": 0.1378, + "step": 1210 + }, + { + "epoch": 2.3284263654389235, + "eval_loss": 0.10013294219970703, + "eval_runtime": 203.5302, + "eval_samples_per_second": 5.267, + "eval_steps_per_second": 5.267, + "step": 1211 + }, + { + "epoch": 2.3457309379318634, + "grad_norm": 0.7206103205680847, + "learning_rate": 3.108557108440914e-06, + "loss": 0.1028, + "step": 1220 + }, + { + "epoch": 2.3649582407017964, + "grad_norm": 0.968497097492218, + "learning_rate": 2.9358090397901634e-06, + "loss": 0.1345, + "step": 1230 + }, + { + "epoch": 2.38418554347173, + "grad_norm": 0.7522798180580139, + "learning_rate": 2.767358663392658e-06, + "loss": 0.1029, + "step": 1240 + }, + { + "epoch": 2.4034128462416633, + "grad_norm": 0.8699542284011841, + "learning_rate": 2.6032816728309166e-06, + "loss": 0.1181, + "step": 1250 + }, + { + "epoch": 2.4226401490115963, + "grad_norm": 0.8779841661453247, + "learning_rate": 2.4436517964960005e-06, + "loss": 0.1028, + "step": 1260 + }, + { + "epoch": 2.4418674517815298, + "grad_norm": 0.6922764182090759, + "learning_rate": 2.2885407644574696e-06, + "loss": 0.1148, + "step": 1270 + }, + { + "epoch": 2.461094754551463, + "grad_norm": 0.7528237700462341, + "learning_rate": 2.1380182762313238e-06, + "loss": 0.1128, + "step": 1280 + }, + { + "epoch": 2.480322057321396, + "grad_norm": 0.8349286913871765, + "learning_rate": 1.992151969460333e-06, + "loss": 0.1027, + "step": 1290 + }, + { + "epoch": 2.4995493600913297, + "grad_norm": 0.8040717244148254, + "learning_rate": 1.8510073895209131e-06, + "loss": 0.1001, + "step": 1300 + }, + { + "epoch": 2.518776662861263, + "grad_norm": 0.8065551519393921, + "learning_rate": 1.7146479600701565e-06, + "loss": 0.1454, + "step": 1310 + }, + { + "epoch": 2.538003965631196, + "grad_norm": 0.7855721712112427, + "learning_rate": 1.5831349545462461e-06, + "loss": 0.1063, + "step": 1320 + }, + { + "epoch": 2.5572312684011296, + "grad_norm": 0.9087608456611633, + "learning_rate": 1.4565274686351022e-06, + "loss": 0.1155, + "step": 1330 + }, + { + "epoch": 2.576458571171063, + "grad_norm": 0.49701324105262756, + "learning_rate": 1.334882393715585e-06, + "loss": 0.1001, + "step": 1340 + }, + { + "epoch": 2.5956858739409965, + "grad_norm": 0.7943114638328552, + "learning_rate": 1.2182543912952178e-06, + "loss": 0.1107, + "step": 1350 + }, + { + "epoch": 2.6149131767109295, + "grad_norm": 0.8685261607170105, + "learning_rate": 1.1066958684479074e-06, + "loss": 0.1209, + "step": 1360 + }, + { + "epoch": 2.634140479480863, + "grad_norm": 1.0667730569839478, + "learning_rate": 1.0002569542646973e-06, + "loss": 0.1361, + "step": 1370 + }, + { + "epoch": 2.653367782250796, + "grad_norm": 0.6879278421401978, + "learning_rate": 8.989854773281486e-07, + "loss": 0.0925, + "step": 1380 + }, + { + "epoch": 2.6610587033587696, + "eval_loss": 0.09926149994134903, + "eval_runtime": 203.0153, + "eval_samples_per_second": 5.28, + "eval_steps_per_second": 5.28, + "step": 1384 + }, + { + "epoch": 2.6725950850207294, + "grad_norm": 0.7204756736755371, + "learning_rate": 8.029269442204348e-07, + "loss": 0.1148, + "step": 1390 + }, + { + "epoch": 2.691822387790663, + "grad_norm": 0.834997832775116, + "learning_rate": 7.121245190748708e-07, + "loss": 0.0918, + "step": 1400 + }, + { + "epoch": 2.7110496905605963, + "grad_norm": 0.8163384795188904, + "learning_rate": 6.266190041799805e-07, + "loss": 0.1345, + "step": 1410 + }, + { + "epoch": 2.7302769933305293, + "grad_norm": 0.6108123660087585, + "learning_rate": 5.464488216449154e-07, + "loss": 0.1235, + "step": 1420 + }, + { + "epoch": 2.7495042961004628, + "grad_norm": 0.8302232027053833, + "learning_rate": 4.716499961343698e-07, + "loss": 0.1163, + "step": 1430 + }, + { + "epoch": 2.7687315988703958, + "grad_norm": 0.670668363571167, + "learning_rate": 4.022561386808177e-07, + "loss": 0.1103, + "step": 1440 + }, + { + "epoch": 2.7879589016403292, + "grad_norm": 0.7220197319984436, + "learning_rate": 3.3829843158131175e-07, + "loss": 0.1228, + "step": 1450 + }, + { + "epoch": 2.8071862044102627, + "grad_norm": 0.5018804669380188, + "learning_rate": 2.798056143856462e-07, + "loss": 0.1225, + "step": 1460 + }, + { + "epoch": 2.826413507180196, + "grad_norm": 0.5343906283378601, + "learning_rate": 2.268039709821687e-07, + "loss": 0.0918, + "step": 1470 + }, + { + "epoch": 2.845640809950129, + "grad_norm": 0.6775656938552856, + "learning_rate": 1.7931731778705052e-07, + "loss": 0.0903, + "step": 1480 + }, + { + "epoch": 2.8648681127200626, + "grad_norm": 0.7841689586639404, + "learning_rate": 1.373669930423288e-07, + "loss": 0.1308, + "step": 1490 + }, + { + "epoch": 2.8840954154899956, + "grad_norm": 0.8570185303688049, + "learning_rate": 1.0097184722750592e-07, + "loss": 0.1287, + "step": 1500 + }, + { + "epoch": 2.903322718259929, + "grad_norm": 0.635200023651123, + "learning_rate": 7.014823458905001e-08, + "loss": 0.1011, + "step": 1510 + }, + { + "epoch": 2.9225500210298625, + "grad_norm": 0.7127873301506042, + "learning_rate": 4.4910005791570786e-08, + "loss": 0.1345, + "step": 1520 + }, + { + "epoch": 2.941777323799796, + "grad_norm": 0.9114808440208435, + "learning_rate": 2.526850169399103e-08, + "loss": 0.1132, + "step": 1530 + }, + { + "epoch": 2.961004626569729, + "grad_norm": 0.7554405927658081, + "learning_rate": 1.1232548253503616e-08, + "loss": 0.1091, + "step": 1540 + }, + { + "epoch": 2.9802319293396624, + "grad_norm": 0.7547165155410767, + "learning_rate": 2.8084525596064337e-09, + "loss": 0.0944, + "step": 1550 + }, + { + "epoch": 2.9936910412786157, + "eval_loss": 0.09935057163238525, + "eval_runtime": 203.0468, + "eval_samples_per_second": 5.28, + "eval_steps_per_second": 5.28, + "step": 1557 + }, + { + "epoch": 2.9994592321095954, + "grad_norm": 0.7488301992416382, + "learning_rate": 0.0, + "loss": 0.1237, + "step": 1560 + } + ], + "logging_steps": 10, + "max_steps": 1560, + "num_input_tokens_seen": 0, + "num_train_epochs": 3, + "save_steps": 173, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": true + }, + "attributes": {} + } + }, + "total_flos": 1.3623219564340838e+18, + "train_batch_size": 1, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-1560/training_args.bin b/checkpoint-1560/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..80c5d5c3761b0cd827095650fdc744b87401c78b --- /dev/null +++ b/checkpoint-1560/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:70ce65338e377750e8ed719c3c255445a47816429c580e0d811222e2d91c722b +size 5624 diff --git a/checkpoint-173/README.md b/checkpoint-173/README.md new file mode 100644 index 0000000000000000000000000000000000000000..20c165c7feaa880a18e0662941a5b246a698be2e --- /dev/null +++ b/checkpoint-173/README.md @@ -0,0 +1,209 @@ +--- +base_model: Salesforce/Llama-xLAM-2-8b-fc-r +library_name: peft +pipeline_tag: text-generation +tags: +- base_model:adapter:Salesforce/Llama-xLAM-2-8b-fc-r +- lora +- sft +- transformers +- trl +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.17.1 \ No newline at end of file diff --git a/checkpoint-173/adapter_config.json b/checkpoint-173/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..53e6fe7efe54ad6e008096a354e97649fac2cc73 --- /dev/null +++ b/checkpoint-173/adapter_config.json @@ -0,0 +1,39 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "Salesforce/Llama-xLAM-2-8b-fc-r", + "bias": "none", + "corda_config": null, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 32, + "lora_bias": false, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "qalora_group_size": 16, + "r": 16, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "k_proj", + "q_proj", + "o_proj", + "v_proj" + ], + "target_parameters": null, + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/checkpoint-173/adapter_model.safetensors b/checkpoint-173/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..45ecabef1a24e39a9d9445911af6ac472cbdf78c --- /dev/null +++ b/checkpoint-173/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bb90972bafb08571f2671a6197b8e1b1e08cedbed1b9bf9614e971bd7b6b2862 +size 54560368 diff --git a/checkpoint-173/optimizer.pt b/checkpoint-173/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..63aaa7b882866a75b85d695bc0b9d67341efc76c --- /dev/null +++ b/checkpoint-173/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:71e9c5b90d662ef48b0d624bea051b8cfef7ee49064f5b377500ad4fe2e97d6b +size 109267450 diff --git a/checkpoint-173/rng_state.pth b/checkpoint-173/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..86faf29e64bae3625c28d455d5fc6d873f178531 --- /dev/null +++ b/checkpoint-173/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7d3eedbee67c41f5df90c3252b137b765d995890a93084d695c5ec5c273157d5 +size 14244 diff --git a/checkpoint-173/scheduler.pt b/checkpoint-173/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..ff80c944a56fd4a4f598aefe3fa8c0b2a8026388 --- /dev/null +++ b/checkpoint-173/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5c9bc5df2f90fffab7f879d1d9a8c9a5eabe7ca3f2be04e0cd5c48e5621148b4 +size 1064 diff --git a/checkpoint-173/special_tokens_map.json b/checkpoint-173/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..1c69f55fd7bb10288d16d4c76497e21e25adfe4f --- /dev/null +++ b/checkpoint-173/special_tokens_map.json @@ -0,0 +1,27 @@ +{ + "additional_special_tokens": [ + "<|eot_id|>", + "<|eom_id|>" + ], + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/checkpoint-173/tokenizer.json b/checkpoint-173/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..1c1d8d5c9024994f1d3b00f9662b8dd89ca13cf2 --- /dev/null +++ b/checkpoint-173/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b9e4e7fb171f92fd137b777cc2714bf87d11576700a1dcd7a399e7bbe39537b +size 17209920 diff --git a/checkpoint-173/tokenizer_config.json b/checkpoint-173/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..3c3fc3b2c234aa8b39e8ee01e3ee9e6fe54a2a1f --- /dev/null +++ b/checkpoint-173/tokenizer_config.json @@ -0,0 +1,2070 @@ +{ + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|python_tag|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "additional_special_tokens": [ + "<|eot_id|>", + "<|eom_id|>" + ], + "bos_token": "<|begin_of_text|>", + "chat_template": "{{- bos_token }}\n{%- if custom_tools is defined %}\n {%- set tools = custom_tools %}\n{%- endif %}\n{%- if not tools_in_user_message is defined %}\n {%- set tools_in_user_message = true %}\n{%- endif %}\n{%- if not tools is defined %}\n {%- set tools = none %}\n{%- endif %}\n\n{#- Extract system message #}\n{{- \"<|start_header_id|>system<|end_header_id|>\\n\\n\" }}\n{%- if messages[0]['role'] == 'system' %}\n {%- set system_message = messages[0]['content'] | trim %}\n {%- set messages = messages[1:] %}\n {{- system_message + \"\\n\" }}\n{%- else %}\n {%- set system_message = \"You are a helpful assistant that can use tools. You are developed by Salesforce xLAM team.\" %}\n {% set format_instruction %}You have access to a set of tools. When using tools, make calls in a single JSON array: \n\n[{\"name\": \"tool_call_name\", \"arguments\": {\"arg1\": \"value1\", \"arg2\": \"value2\"}}, ... (additional parallel tool calls as needed)]\n\nIf no tool is suitable, state that explicitly. If the user's input lacks required parameters, ask for clarification. Do not interpret or respond until tool results are returned. Once they are available, process them or make additional calls if needed. For tasks that don't require tools, such as casual conversation or general advice, respond directly in plain text. The available tools are:{% endset %}\n {{- system_message + \"\\n\" }}\n {%- if tools is not none %}\n {{- format_instruction + \"\\n\\n\" }}\n {%- endif %}\n{%- endif %}\n\n\n{%- if tools is not none %}\n {%- for t in tools %}\n {{- t | tojson(indent=4) }}\n {{- \"\\n\\n\" }}\n {%- endfor %}\n{%- endif %}\n{{- \"<|eot_id|>\" }}\n\n{%- for message in messages %}\n {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %}\n {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\\n\\n'+ message['content'] | trim + '<|eot_id|>' }}\n {%- elif 'tool_calls' in message %}\n {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' -}}\n {%- if message['tool_calls'] %}\n {{- \"[\" }}\n {%- for tool_call_function in message.tool_calls %}\n {%- set tool_call = tool_call_function.function %}\n {{- '{\"name\": \"' + tool_call.name + '\", ' }}\n {{- '\"arguments\": ' }}\n {{- tool_call.arguments | tojson }}\n {{- \"}\" }}\n {%- if not loop.last %}\n {{- \", \" }}\n {%- endif %}\n {%- endfor %}\n {{- \"]\" }}\n {{- \"<|eot_id|>\" }}\n {%- elif message['content'] %}\n {{- message['content'] | trim + '<|eot_id|>' }}\n {%- else %}\n {{- \"[]\\n\" + '<|eot_id|>' }}\n {%- endif %}\n {%- elif message.role == \"tool\" or message.role == \"ipython\" %}\n {{- \"<|start_header_id|>\" + \"ipython\" + \"<|end_header_id|>\\n\\n\" }}\n {%- set content = message[\"content\"] %}\n {%- if content is mapping or (content is iterable and content is not string) %}\n {{- content | tojson }}\n {%- else %}\n {{- content }}\n {%- endif %}\n {{- \"<|eot_id|>\" }}\n {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' }}\n{%- endif %}", + "clean_up_tokenization_spaces": true, + "eos_token": "<|eot_id|>", + "extra_special_tokens": {}, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 16384, + "pad_token": "<|eot_id|>", + "padding_side": "right", + "split_special_tokens": false, + "tokenizer_class": "PreTrainedTokenizerFast" +} diff --git a/checkpoint-173/trainer_state.json b/checkpoint-173/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..be178a3efb7fafc855ee269e063323c6b567cfeb --- /dev/null +++ b/checkpoint-173/trainer_state.json @@ -0,0 +1,160 @@ +{ + "best_metric": 0.14381718635559082, + "best_model_checkpoint": "./xlam_lora_new_2560_1_delete_over_size_3epoch_multi/checkpoint-173", + "epoch": 0.3326323379198462, + "eval_steps": 173, + "global_step": 173, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.019227302769933306, + "grad_norm": 0.8050442337989807, + "learning_rate": 3.205128205128205e-06, + "loss": 0.6642, + "step": 10 + }, + { + "epoch": 0.03845460553986661, + "grad_norm": 0.45196670293807983, + "learning_rate": 6.41025641025641e-06, + "loss": 0.6377, + "step": 20 + }, + { + "epoch": 0.05768190830979992, + "grad_norm": 0.47538116574287415, + "learning_rate": 9.615384615384616e-06, + "loss": 0.6205, + "step": 30 + }, + { + "epoch": 0.07690921107973323, + "grad_norm": 0.34218236804008484, + "learning_rate": 1.282051282051282e-05, + "loss": 0.5579, + "step": 40 + }, + { + "epoch": 0.09613651384966652, + "grad_norm": 0.38529354333877563, + "learning_rate": 1.602564102564103e-05, + "loss": 0.4393, + "step": 50 + }, + { + "epoch": 0.11536381661959984, + "grad_norm": 0.3676348924636841, + "learning_rate": 1.923076923076923e-05, + "loss": 0.3965, + "step": 60 + }, + { + "epoch": 0.13459111938953314, + "grad_norm": 0.3427989184856415, + "learning_rate": 2.2435897435897437e-05, + "loss": 0.3488, + "step": 70 + }, + { + "epoch": 0.15381842215946645, + "grad_norm": 0.30555886030197144, + "learning_rate": 2.4999887657859027e-05, + "loss": 0.2224, + "step": 80 + }, + { + "epoch": 0.17304572492939974, + "grad_norm": 0.3501119315624237, + "learning_rate": 2.4995955894949523e-05, + "loss": 0.248, + "step": 90 + }, + { + "epoch": 0.19227302769933305, + "grad_norm": 0.36164987087249756, + "learning_rate": 2.4986409044149163e-05, + "loss": 0.2322, + "step": 100 + }, + { + "epoch": 0.21150033046926636, + "grad_norm": 0.3375028967857361, + "learning_rate": 2.4971251395358342e-05, + "loss": 0.2427, + "step": 110 + }, + { + "epoch": 0.23072763323919968, + "grad_norm": 0.3321882486343384, + "learning_rate": 2.495048975970308e-05, + "loss": 0.1967, + "step": 120 + }, + { + "epoch": 0.24995493600913296, + "grad_norm": 0.2828320562839508, + "learning_rate": 2.492413346647437e-05, + "loss": 0.1636, + "step": 130 + }, + { + "epoch": 0.2691822387790663, + "grad_norm": 0.3430372476577759, + "learning_rate": 2.4892194358936095e-05, + "loss": 0.2041, + "step": 140 + }, + { + "epoch": 0.28840954154899956, + "grad_norm": 0.3393559455871582, + "learning_rate": 2.4854686789003173e-05, + "loss": 0.1911, + "step": 150 + }, + { + "epoch": 0.3076368443189329, + "grad_norm": 0.36908936500549316, + "learning_rate": 2.4811627610792543e-05, + "loss": 0.2025, + "step": 160 + }, + { + "epoch": 0.3268641470888662, + "grad_norm": 0.38679710030555725, + "learning_rate": 2.4763036173049677e-05, + "loss": 0.1566, + "step": 170 + }, + { + "epoch": 0.3326323379198462, + "eval_loss": 0.14381718635559082, + "eval_runtime": 202.8561, + "eval_samples_per_second": 5.285, + "eval_steps_per_second": 5.285, + "step": 173 + } + ], + "logging_steps": 10, + "max_steps": 1560, + "num_input_tokens_seen": 0, + "num_train_epochs": 3, + "save_steps": 173, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 1.5023807483019264e+17, + "train_batch_size": 1, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-173/training_args.bin b/checkpoint-173/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..80c5d5c3761b0cd827095650fdc744b87401c78b --- /dev/null +++ b/checkpoint-173/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:70ce65338e377750e8ed719c3c255445a47816429c580e0d811222e2d91c722b +size 5624 diff --git a/checkpoint-346/README.md b/checkpoint-346/README.md new file mode 100644 index 0000000000000000000000000000000000000000..20c165c7feaa880a18e0662941a5b246a698be2e --- /dev/null +++ b/checkpoint-346/README.md @@ -0,0 +1,209 @@ +--- +base_model: Salesforce/Llama-xLAM-2-8b-fc-r +library_name: peft +pipeline_tag: text-generation +tags: +- base_model:adapter:Salesforce/Llama-xLAM-2-8b-fc-r +- lora +- sft +- transformers +- trl +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.17.1 \ No newline at end of file diff --git a/checkpoint-346/adapter_config.json b/checkpoint-346/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..53e6fe7efe54ad6e008096a354e97649fac2cc73 --- /dev/null +++ b/checkpoint-346/adapter_config.json @@ -0,0 +1,39 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "Salesforce/Llama-xLAM-2-8b-fc-r", + "bias": "none", + "corda_config": null, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 32, + "lora_bias": false, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "qalora_group_size": 16, + "r": 16, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "k_proj", + "q_proj", + "o_proj", + "v_proj" + ], + "target_parameters": null, + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/checkpoint-346/adapter_model.safetensors b/checkpoint-346/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..67c1278d93ea3a1b9b8e656dd8569cfe27246427 --- /dev/null +++ b/checkpoint-346/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:347925b30c8b6101f68e7d4f035978e6b4a1d1b546d3352e55592e533dcb9568 +size 54560368 diff --git a/checkpoint-346/optimizer.pt b/checkpoint-346/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..0243174ef9ea445bb40833ea301c736ac17fa9a0 --- /dev/null +++ b/checkpoint-346/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7c3d34826c0ac894d95d752820256d57728c531923ccb85b0c3b34b2eabed42a +size 109267450 diff --git a/checkpoint-346/rng_state.pth b/checkpoint-346/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..4d2577cda326f0ebae737604a7af64209325806e --- /dev/null +++ b/checkpoint-346/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7829eb75a78407031cb5165647686a79b16da157511278f0374a39f8f5e67294 +size 14244 diff --git a/checkpoint-346/scheduler.pt b/checkpoint-346/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..8f57f6b3e3b7a7bc57c878e7eb87474323ec57e0 --- /dev/null +++ b/checkpoint-346/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c753459170ab58c0333539acdc76cf8e49dd05b53a20df8717dfb07b0739d3b2 +size 1064 diff --git a/checkpoint-346/special_tokens_map.json b/checkpoint-346/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..1c69f55fd7bb10288d16d4c76497e21e25adfe4f --- /dev/null +++ b/checkpoint-346/special_tokens_map.json @@ -0,0 +1,27 @@ +{ + "additional_special_tokens": [ + "<|eot_id|>", + "<|eom_id|>" + ], + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/checkpoint-346/tokenizer.json b/checkpoint-346/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..1c1d8d5c9024994f1d3b00f9662b8dd89ca13cf2 --- /dev/null +++ b/checkpoint-346/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b9e4e7fb171f92fd137b777cc2714bf87d11576700a1dcd7a399e7bbe39537b +size 17209920 diff --git a/checkpoint-346/tokenizer_config.json b/checkpoint-346/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..3c3fc3b2c234aa8b39e8ee01e3ee9e6fe54a2a1f --- /dev/null +++ b/checkpoint-346/tokenizer_config.json @@ -0,0 +1,2070 @@ +{ + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|python_tag|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "additional_special_tokens": [ + "<|eot_id|>", + "<|eom_id|>" + ], + "bos_token": "<|begin_of_text|>", + "chat_template": "{{- bos_token }}\n{%- if custom_tools is defined %}\n {%- set tools = custom_tools %}\n{%- endif %}\n{%- if not tools_in_user_message is defined %}\n {%- set tools_in_user_message = true %}\n{%- endif %}\n{%- if not tools is defined %}\n {%- set tools = none %}\n{%- endif %}\n\n{#- Extract system message #}\n{{- \"<|start_header_id|>system<|end_header_id|>\\n\\n\" }}\n{%- if messages[0]['role'] == 'system' %}\n {%- set system_message = messages[0]['content'] | trim %}\n {%- set messages = messages[1:] %}\n {{- system_message + \"\\n\" }}\n{%- else %}\n {%- set system_message = \"You are a helpful assistant that can use tools. You are developed by Salesforce xLAM team.\" %}\n {% set format_instruction %}You have access to a set of tools. When using tools, make calls in a single JSON array: \n\n[{\"name\": \"tool_call_name\", \"arguments\": {\"arg1\": \"value1\", \"arg2\": \"value2\"}}, ... (additional parallel tool calls as needed)]\n\nIf no tool is suitable, state that explicitly. If the user's input lacks required parameters, ask for clarification. Do not interpret or respond until tool results are returned. Once they are available, process them or make additional calls if needed. For tasks that don't require tools, such as casual conversation or general advice, respond directly in plain text. The available tools are:{% endset %}\n {{- system_message + \"\\n\" }}\n {%- if tools is not none %}\n {{- format_instruction + \"\\n\\n\" }}\n {%- endif %}\n{%- endif %}\n\n\n{%- if tools is not none %}\n {%- for t in tools %}\n {{- t | tojson(indent=4) }}\n {{- \"\\n\\n\" }}\n {%- endfor %}\n{%- endif %}\n{{- \"<|eot_id|>\" }}\n\n{%- for message in messages %}\n {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %}\n {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\\n\\n'+ message['content'] | trim + '<|eot_id|>' }}\n {%- elif 'tool_calls' in message %}\n {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' -}}\n {%- if message['tool_calls'] %}\n {{- \"[\" }}\n {%- for tool_call_function in message.tool_calls %}\n {%- set tool_call = tool_call_function.function %}\n {{- '{\"name\": \"' + tool_call.name + '\", ' }}\n {{- '\"arguments\": ' }}\n {{- tool_call.arguments | tojson }}\n {{- \"}\" }}\n {%- if not loop.last %}\n {{- \", \" }}\n {%- endif %}\n {%- endfor %}\n {{- \"]\" }}\n {{- \"<|eot_id|>\" }}\n {%- elif message['content'] %}\n {{- message['content'] | trim + '<|eot_id|>' }}\n {%- else %}\n {{- \"[]\\n\" + '<|eot_id|>' }}\n {%- endif %}\n {%- elif message.role == \"tool\" or message.role == \"ipython\" %}\n {{- \"<|start_header_id|>\" + \"ipython\" + \"<|end_header_id|>\\n\\n\" }}\n {%- set content = message[\"content\"] %}\n {%- if content is mapping or (content is iterable and content is not string) %}\n {{- content | tojson }}\n {%- else %}\n {{- content }}\n {%- endif %}\n {{- \"<|eot_id|>\" }}\n {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' }}\n{%- endif %}", + "clean_up_tokenization_spaces": true, + "eos_token": "<|eot_id|>", + "extra_special_tokens": {}, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 16384, + "pad_token": "<|eot_id|>", + "padding_side": "right", + "split_special_tokens": false, + "tokenizer_class": "PreTrainedTokenizerFast" +} diff --git a/checkpoint-346/trainer_state.json b/checkpoint-346/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..db22307056e2d62b4cead06dbb38001080adbaaa --- /dev/null +++ b/checkpoint-346/trainer_state.json @@ -0,0 +1,287 @@ +{ + "best_metric": 0.1198095753788948, + "best_model_checkpoint": "./xlam_lora_new_2560_1_delete_over_size_3epoch_multi/checkpoint-346", + "epoch": 0.6652646758396924, + "eval_steps": 173, + "global_step": 346, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.019227302769933306, + "grad_norm": 0.8050442337989807, + "learning_rate": 3.205128205128205e-06, + "loss": 0.6642, + "step": 10 + }, + { + "epoch": 0.03845460553986661, + "grad_norm": 0.45196670293807983, + "learning_rate": 6.41025641025641e-06, + "loss": 0.6377, + "step": 20 + }, + { + "epoch": 0.05768190830979992, + "grad_norm": 0.47538116574287415, + "learning_rate": 9.615384615384616e-06, + "loss": 0.6205, + "step": 30 + }, + { + "epoch": 0.07690921107973323, + "grad_norm": 0.34218236804008484, + "learning_rate": 1.282051282051282e-05, + "loss": 0.5579, + "step": 40 + }, + { + "epoch": 0.09613651384966652, + "grad_norm": 0.38529354333877563, + "learning_rate": 1.602564102564103e-05, + "loss": 0.4393, + "step": 50 + }, + { + "epoch": 0.11536381661959984, + "grad_norm": 0.3676348924636841, + "learning_rate": 1.923076923076923e-05, + "loss": 0.3965, + "step": 60 + }, + { + "epoch": 0.13459111938953314, + "grad_norm": 0.3427989184856415, + "learning_rate": 2.2435897435897437e-05, + "loss": 0.3488, + "step": 70 + }, + { + "epoch": 0.15381842215946645, + "grad_norm": 0.30555886030197144, + "learning_rate": 2.4999887657859027e-05, + "loss": 0.2224, + "step": 80 + }, + { + "epoch": 0.17304572492939974, + "grad_norm": 0.3501119315624237, + "learning_rate": 2.4995955894949523e-05, + "loss": 0.248, + "step": 90 + }, + { + "epoch": 0.19227302769933305, + "grad_norm": 0.36164987087249756, + "learning_rate": 2.4986409044149163e-05, + "loss": 0.2322, + "step": 100 + }, + { + "epoch": 0.21150033046926636, + "grad_norm": 0.3375028967857361, + "learning_rate": 2.4971251395358342e-05, + "loss": 0.2427, + "step": 110 + }, + { + "epoch": 0.23072763323919968, + "grad_norm": 0.3321882486343384, + "learning_rate": 2.495048975970308e-05, + "loss": 0.1967, + "step": 120 + }, + { + "epoch": 0.24995493600913296, + "grad_norm": 0.2828320562839508, + "learning_rate": 2.492413346647437e-05, + "loss": 0.1636, + "step": 130 + }, + { + "epoch": 0.2691822387790663, + "grad_norm": 0.3430372476577759, + "learning_rate": 2.4892194358936095e-05, + "loss": 0.2041, + "step": 140 + }, + { + "epoch": 0.28840954154899956, + "grad_norm": 0.3393559455871582, + "learning_rate": 2.4854686789003173e-05, + "loss": 0.1911, + "step": 150 + }, + { + "epoch": 0.3076368443189329, + "grad_norm": 0.36908936500549316, + "learning_rate": 2.4811627610792543e-05, + "loss": 0.2025, + "step": 160 + }, + { + "epoch": 0.3268641470888662, + "grad_norm": 0.38679710030555725, + "learning_rate": 2.4763036173049677e-05, + "loss": 0.1566, + "step": 170 + }, + { + "epoch": 0.3326323379198462, + "eval_loss": 0.14381718635559082, + "eval_runtime": 202.8561, + "eval_samples_per_second": 5.285, + "eval_steps_per_second": 5.285, + "step": 173 + }, + { + "epoch": 0.34609144985879947, + "grad_norm": 0.39648687839508057, + "learning_rate": 2.4708934310454207e-05, + "loss": 0.1943, + "step": 180 + }, + { + "epoch": 0.3653187526287328, + "grad_norm": 0.3979399800300598, + "learning_rate": 2.4649346333808458e-05, + "loss": 0.1594, + "step": 190 + }, + { + "epoch": 0.3845460553986661, + "grad_norm": 0.34854283928871155, + "learning_rate": 2.458429901911331e-05, + "loss": 0.1683, + "step": 200 + }, + { + "epoch": 0.40377335816859944, + "grad_norm": 0.26675811409950256, + "learning_rate": 2.4513821595536356e-05, + "loss": 0.1616, + "step": 210 + }, + { + "epoch": 0.4230006609385327, + "grad_norm": 0.4399104118347168, + "learning_rate": 2.44379457322777e-05, + "loss": 0.1664, + "step": 220 + }, + { + "epoch": 0.442227963708466, + "grad_norm": 0.5316939353942871, + "learning_rate": 2.4356705524339317e-05, + "loss": 0.1745, + "step": 230 + }, + { + "epoch": 0.46145526647839935, + "grad_norm": 0.5996547341346741, + "learning_rate": 2.4270137477204408e-05, + "loss": 0.1753, + "step": 240 + }, + { + "epoch": 0.48068256924833264, + "grad_norm": 0.4330001175403595, + "learning_rate": 2.417828049043353e-05, + "loss": 0.1997, + "step": 250 + }, + { + "epoch": 0.4999098720182659, + "grad_norm": 0.4255751073360443, + "learning_rate": 2.4081175840185022e-05, + "loss": 0.1728, + "step": 260 + }, + { + "epoch": 0.5191371747881992, + "grad_norm": 0.536382257938385, + "learning_rate": 2.3978867160667457e-05, + "loss": 0.147, + "step": 270 + }, + { + "epoch": 0.5383644775581325, + "grad_norm": 0.5623698830604553, + "learning_rate": 2.3871400424532493e-05, + "loss": 0.1863, + "step": 280 + }, + { + "epoch": 0.5575917803280659, + "grad_norm": 0.49679550528526306, + "learning_rate": 2.375882392221695e-05, + "loss": 0.1685, + "step": 290 + }, + { + "epoch": 0.5768190830979991, + "grad_norm": 0.5784851908683777, + "learning_rate": 2.36411882402434e-05, + "loss": 0.1506, + "step": 300 + }, + { + "epoch": 0.5960463858679325, + "grad_norm": 0.6098183393478394, + "learning_rate": 2.3518546238489e-05, + "loss": 0.1565, + "step": 310 + }, + { + "epoch": 0.6152736886378658, + "grad_norm": 0.5198598504066467, + "learning_rate": 2.339095302643273e-05, + "loss": 0.1433, + "step": 320 + }, + { + "epoch": 0.634500991407799, + "grad_norm": 0.5796005129814148, + "learning_rate": 2.325846593839188e-05, + "loss": 0.1668, + "step": 330 + }, + { + "epoch": 0.6537282941777324, + "grad_norm": 0.6006646752357483, + "learning_rate": 2.312114450775869e-05, + "loss": 0.1505, + "step": 340 + }, + { + "epoch": 0.6652646758396924, + "eval_loss": 0.1198095753788948, + "eval_runtime": 202.8938, + "eval_samples_per_second": 5.284, + "eval_steps_per_second": 5.284, + "step": 346 + } + ], + "logging_steps": 10, + "max_steps": 1560, + "num_input_tokens_seen": 0, + "num_train_epochs": 3, + "save_steps": 173, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 3.028663287378739e+17, + "train_batch_size": 1, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-346/training_args.bin b/checkpoint-346/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..80c5d5c3761b0cd827095650fdc744b87401c78b --- /dev/null +++ b/checkpoint-346/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:70ce65338e377750e8ed719c3c255445a47816429c580e0d811222e2d91c722b +size 5624 diff --git a/checkpoint-519/README.md b/checkpoint-519/README.md new file mode 100644 index 0000000000000000000000000000000000000000..20c165c7feaa880a18e0662941a5b246a698be2e --- /dev/null +++ b/checkpoint-519/README.md @@ -0,0 +1,209 @@ +--- +base_model: Salesforce/Llama-xLAM-2-8b-fc-r +library_name: peft +pipeline_tag: text-generation +tags: +- base_model:adapter:Salesforce/Llama-xLAM-2-8b-fc-r +- lora +- sft +- transformers +- trl +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.17.1 \ No newline at end of file diff --git a/checkpoint-519/adapter_config.json b/checkpoint-519/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..53e6fe7efe54ad6e008096a354e97649fac2cc73 --- /dev/null +++ b/checkpoint-519/adapter_config.json @@ -0,0 +1,39 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "Salesforce/Llama-xLAM-2-8b-fc-r", + "bias": "none", + "corda_config": null, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 32, + "lora_bias": false, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "qalora_group_size": 16, + "r": 16, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "k_proj", + "q_proj", + "o_proj", + "v_proj" + ], + "target_parameters": null, + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/checkpoint-519/adapter_model.safetensors b/checkpoint-519/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e335ba62786ea23f17d514097e6c8d4800c5865b --- /dev/null +++ b/checkpoint-519/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:11719b17d1b1ede324a9caf6c35e858ee23bead84e4f9814d9fa221e0850e90c +size 54560368 diff --git a/checkpoint-519/optimizer.pt b/checkpoint-519/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..6834db2c4533e8265a37f756a9e3d682f79d5aac --- /dev/null +++ b/checkpoint-519/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:10259355b778c24d07d3c7f26106c3750d0b2a634ca44f0f254c8cbe27a0acbb +size 109267450 diff --git a/checkpoint-519/rng_state.pth b/checkpoint-519/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..e19a206f3b42b8857c6cd87bf5aee3c9471462f9 --- /dev/null +++ b/checkpoint-519/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f671a4f38c9093e18e02dac7c9d22587a89899c9ff5def681945686e883b2021 +size 14244 diff --git a/checkpoint-519/scheduler.pt b/checkpoint-519/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..dfe5d8698d03f226a7c531b19adf06699f14ecdf --- /dev/null +++ b/checkpoint-519/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5499a8519408171a56a2db34bd09b0e9f50892220d665d95d635a6271cd50936 +size 1064 diff --git a/checkpoint-519/special_tokens_map.json b/checkpoint-519/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..1c69f55fd7bb10288d16d4c76497e21e25adfe4f --- /dev/null +++ b/checkpoint-519/special_tokens_map.json @@ -0,0 +1,27 @@ +{ + "additional_special_tokens": [ + "<|eot_id|>", + "<|eom_id|>" + ], + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/checkpoint-519/tokenizer.json b/checkpoint-519/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..1c1d8d5c9024994f1d3b00f9662b8dd89ca13cf2 --- /dev/null +++ b/checkpoint-519/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b9e4e7fb171f92fd137b777cc2714bf87d11576700a1dcd7a399e7bbe39537b +size 17209920 diff --git a/checkpoint-519/tokenizer_config.json b/checkpoint-519/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..3c3fc3b2c234aa8b39e8ee01e3ee9e6fe54a2a1f --- /dev/null +++ b/checkpoint-519/tokenizer_config.json @@ -0,0 +1,2070 @@ +{ + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|python_tag|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "additional_special_tokens": [ + "<|eot_id|>", + "<|eom_id|>" + ], + "bos_token": "<|begin_of_text|>", + "chat_template": "{{- bos_token }}\n{%- if custom_tools is defined %}\n {%- set tools = custom_tools %}\n{%- endif %}\n{%- if not tools_in_user_message is defined %}\n {%- set tools_in_user_message = true %}\n{%- endif %}\n{%- if not tools is defined %}\n {%- set tools = none %}\n{%- endif %}\n\n{#- Extract system message #}\n{{- \"<|start_header_id|>system<|end_header_id|>\\n\\n\" }}\n{%- if messages[0]['role'] == 'system' %}\n {%- set system_message = messages[0]['content'] | trim %}\n {%- set messages = messages[1:] %}\n {{- system_message + \"\\n\" }}\n{%- else %}\n {%- set system_message = \"You are a helpful assistant that can use tools. You are developed by Salesforce xLAM team.\" %}\n {% set format_instruction %}You have access to a set of tools. When using tools, make calls in a single JSON array: \n\n[{\"name\": \"tool_call_name\", \"arguments\": {\"arg1\": \"value1\", \"arg2\": \"value2\"}}, ... (additional parallel tool calls as needed)]\n\nIf no tool is suitable, state that explicitly. If the user's input lacks required parameters, ask for clarification. Do not interpret or respond until tool results are returned. Once they are available, process them or make additional calls if needed. For tasks that don't require tools, such as casual conversation or general advice, respond directly in plain text. The available tools are:{% endset %}\n {{- system_message + \"\\n\" }}\n {%- if tools is not none %}\n {{- format_instruction + \"\\n\\n\" }}\n {%- endif %}\n{%- endif %}\n\n\n{%- if tools is not none %}\n {%- for t in tools %}\n {{- t | tojson(indent=4) }}\n {{- \"\\n\\n\" }}\n {%- endfor %}\n{%- endif %}\n{{- \"<|eot_id|>\" }}\n\n{%- for message in messages %}\n {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %}\n {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\\n\\n'+ message['content'] | trim + '<|eot_id|>' }}\n {%- elif 'tool_calls' in message %}\n {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' -}}\n {%- if message['tool_calls'] %}\n {{- \"[\" }}\n {%- for tool_call_function in message.tool_calls %}\n {%- set tool_call = tool_call_function.function %}\n {{- '{\"name\": \"' + tool_call.name + '\", ' }}\n {{- '\"arguments\": ' }}\n {{- tool_call.arguments | tojson }}\n {{- \"}\" }}\n {%- if not loop.last %}\n {{- \", \" }}\n {%- endif %}\n {%- endfor %}\n {{- \"]\" }}\n {{- \"<|eot_id|>\" }}\n {%- elif message['content'] %}\n {{- message['content'] | trim + '<|eot_id|>' }}\n {%- else %}\n {{- \"[]\\n\" + '<|eot_id|>' }}\n {%- endif %}\n {%- elif message.role == \"tool\" or message.role == \"ipython\" %}\n {{- \"<|start_header_id|>\" + \"ipython\" + \"<|end_header_id|>\\n\\n\" }}\n {%- set content = message[\"content\"] %}\n {%- if content is mapping or (content is iterable and content is not string) %}\n {{- content | tojson }}\n {%- else %}\n {{- content }}\n {%- endif %}\n {{- \"<|eot_id|>\" }}\n {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' }}\n{%- endif %}", + "clean_up_tokenization_spaces": true, + "eos_token": "<|eot_id|>", + "extra_special_tokens": {}, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 16384, + "pad_token": "<|eot_id|>", + "padding_side": "right", + "split_special_tokens": false, + "tokenizer_class": "PreTrainedTokenizerFast" +} diff --git a/checkpoint-519/trainer_state.json b/checkpoint-519/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..eff2e72a64c7c1abfcaaa1b1b3952f79717c71cc --- /dev/null +++ b/checkpoint-519/trainer_state.json @@ -0,0 +1,414 @@ +{ + "best_metric": 0.11108512431383133, + "best_model_checkpoint": "./xlam_lora_new_2560_1_delete_over_size_3epoch_multi/checkpoint-519", + "epoch": 0.9978970137595385, + "eval_steps": 173, + "global_step": 519, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.019227302769933306, + "grad_norm": 0.8050442337989807, + "learning_rate": 3.205128205128205e-06, + "loss": 0.6642, + "step": 10 + }, + { + "epoch": 0.03845460553986661, + "grad_norm": 0.45196670293807983, + "learning_rate": 6.41025641025641e-06, + "loss": 0.6377, + "step": 20 + }, + { + "epoch": 0.05768190830979992, + "grad_norm": 0.47538116574287415, + "learning_rate": 9.615384615384616e-06, + "loss": 0.6205, + "step": 30 + }, + { + "epoch": 0.07690921107973323, + "grad_norm": 0.34218236804008484, + "learning_rate": 1.282051282051282e-05, + "loss": 0.5579, + "step": 40 + }, + { + "epoch": 0.09613651384966652, + "grad_norm": 0.38529354333877563, + "learning_rate": 1.602564102564103e-05, + "loss": 0.4393, + "step": 50 + }, + { + "epoch": 0.11536381661959984, + "grad_norm": 0.3676348924636841, + "learning_rate": 1.923076923076923e-05, + "loss": 0.3965, + "step": 60 + }, + { + "epoch": 0.13459111938953314, + "grad_norm": 0.3427989184856415, + "learning_rate": 2.2435897435897437e-05, + "loss": 0.3488, + "step": 70 + }, + { + "epoch": 0.15381842215946645, + "grad_norm": 0.30555886030197144, + "learning_rate": 2.4999887657859027e-05, + "loss": 0.2224, + "step": 80 + }, + { + "epoch": 0.17304572492939974, + "grad_norm": 0.3501119315624237, + "learning_rate": 2.4995955894949523e-05, + "loss": 0.248, + "step": 90 + }, + { + "epoch": 0.19227302769933305, + "grad_norm": 0.36164987087249756, + "learning_rate": 2.4986409044149163e-05, + "loss": 0.2322, + "step": 100 + }, + { + "epoch": 0.21150033046926636, + "grad_norm": 0.3375028967857361, + "learning_rate": 2.4971251395358342e-05, + "loss": 0.2427, + "step": 110 + }, + { + "epoch": 0.23072763323919968, + "grad_norm": 0.3321882486343384, + "learning_rate": 2.495048975970308e-05, + "loss": 0.1967, + "step": 120 + }, + { + "epoch": 0.24995493600913296, + "grad_norm": 0.2828320562839508, + "learning_rate": 2.492413346647437e-05, + "loss": 0.1636, + "step": 130 + }, + { + "epoch": 0.2691822387790663, + "grad_norm": 0.3430372476577759, + "learning_rate": 2.4892194358936095e-05, + "loss": 0.2041, + "step": 140 + }, + { + "epoch": 0.28840954154899956, + "grad_norm": 0.3393559455871582, + "learning_rate": 2.4854686789003173e-05, + "loss": 0.1911, + "step": 150 + }, + { + "epoch": 0.3076368443189329, + "grad_norm": 0.36908936500549316, + "learning_rate": 2.4811627610792543e-05, + "loss": 0.2025, + "step": 160 + }, + { + "epoch": 0.3268641470888662, + "grad_norm": 0.38679710030555725, + "learning_rate": 2.4763036173049677e-05, + "loss": 0.1566, + "step": 170 + }, + { + "epoch": 0.3326323379198462, + "eval_loss": 0.14381718635559082, + "eval_runtime": 202.8561, + "eval_samples_per_second": 5.285, + "eval_steps_per_second": 5.285, + "step": 173 + }, + { + "epoch": 0.34609144985879947, + "grad_norm": 0.39648687839508057, + "learning_rate": 2.4708934310454207e-05, + "loss": 0.1943, + "step": 180 + }, + { + "epoch": 0.3653187526287328, + "grad_norm": 0.3979399800300598, + "learning_rate": 2.4649346333808458e-05, + "loss": 0.1594, + "step": 190 + }, + { + "epoch": 0.3845460553986661, + "grad_norm": 0.34854283928871155, + "learning_rate": 2.458429901911331e-05, + "loss": 0.1683, + "step": 200 + }, + { + "epoch": 0.40377335816859944, + "grad_norm": 0.26675811409950256, + "learning_rate": 2.4513821595536356e-05, + "loss": 0.1616, + "step": 210 + }, + { + "epoch": 0.4230006609385327, + "grad_norm": 0.4399104118347168, + "learning_rate": 2.44379457322777e-05, + "loss": 0.1664, + "step": 220 + }, + { + "epoch": 0.442227963708466, + "grad_norm": 0.5316939353942871, + "learning_rate": 2.4356705524339317e-05, + "loss": 0.1745, + "step": 230 + }, + { + "epoch": 0.46145526647839935, + "grad_norm": 0.5996547341346741, + "learning_rate": 2.4270137477204408e-05, + "loss": 0.1753, + "step": 240 + }, + { + "epoch": 0.48068256924833264, + "grad_norm": 0.4330001175403595, + "learning_rate": 2.417828049043353e-05, + "loss": 0.1997, + "step": 250 + }, + { + "epoch": 0.4999098720182659, + "grad_norm": 0.4255751073360443, + "learning_rate": 2.4081175840185022e-05, + "loss": 0.1728, + "step": 260 + }, + { + "epoch": 0.5191371747881992, + "grad_norm": 0.536382257938385, + "learning_rate": 2.3978867160667457e-05, + "loss": 0.147, + "step": 270 + }, + { + "epoch": 0.5383644775581325, + "grad_norm": 0.5623698830604553, + "learning_rate": 2.3871400424532493e-05, + "loss": 0.1863, + "step": 280 + }, + { + "epoch": 0.5575917803280659, + "grad_norm": 0.49679550528526306, + "learning_rate": 2.375882392221695e-05, + "loss": 0.1685, + "step": 290 + }, + { + "epoch": 0.5768190830979991, + "grad_norm": 0.5784851908683777, + "learning_rate": 2.36411882402434e-05, + "loss": 0.1506, + "step": 300 + }, + { + "epoch": 0.5960463858679325, + "grad_norm": 0.6098183393478394, + "learning_rate": 2.3518546238489e-05, + "loss": 0.1565, + "step": 310 + }, + { + "epoch": 0.6152736886378658, + "grad_norm": 0.5198598504066467, + "learning_rate": 2.339095302643273e-05, + "loss": 0.1433, + "step": 320 + }, + { + "epoch": 0.634500991407799, + "grad_norm": 0.5796005129814148, + "learning_rate": 2.325846593839188e-05, + "loss": 0.1668, + "step": 330 + }, + { + "epoch": 0.6537282941777324, + "grad_norm": 0.6006646752357483, + "learning_rate": 2.312114450775869e-05, + "loss": 0.1505, + "step": 340 + }, + { + "epoch": 0.6652646758396924, + "eval_loss": 0.1198095753788948, + "eval_runtime": 202.8938, + "eval_samples_per_second": 5.284, + "eval_steps_per_second": 5.284, + "step": 346 + }, + { + "epoch": 0.6729555969476657, + "grad_norm": 0.5787773728370667, + "learning_rate": 2.2979050440248896e-05, + "loss": 0.1442, + "step": 350 + }, + { + "epoch": 0.6921828997175989, + "grad_norm": 0.5230283141136169, + "learning_rate": 2.2832247586174118e-05, + "loss": 0.1555, + "step": 360 + }, + { + "epoch": 0.7114102024875323, + "grad_norm": 0.5551069378852844, + "learning_rate": 2.2680801911750558e-05, + "loss": 0.1422, + "step": 370 + }, + { + "epoch": 0.7306375052574656, + "grad_norm": 0.5769614577293396, + "learning_rate": 2.2524781469456928e-05, + "loss": 0.165, + "step": 380 + }, + { + "epoch": 0.7498648080273989, + "grad_norm": 0.6609200239181519, + "learning_rate": 2.2364256367454922e-05, + "loss": 0.161, + "step": 390 + }, + { + "epoch": 0.7690921107973322, + "grad_norm": 0.5530131459236145, + "learning_rate": 2.2199298738085907e-05, + "loss": 0.1709, + "step": 400 + }, + { + "epoch": 0.7883194135672655, + "grad_norm": 0.7019795775413513, + "learning_rate": 2.2029982705458107e-05, + "loss": 0.1471, + "step": 410 + }, + { + "epoch": 0.8075467163371989, + "grad_norm": 0.5327528715133667, + "learning_rate": 2.1856384352138765e-05, + "loss": 0.1913, + "step": 420 + }, + { + "epoch": 0.8267740191071321, + "grad_norm": 0.5548112988471985, + "learning_rate": 2.1678581684966235e-05, + "loss": 0.1509, + "step": 430 + }, + { + "epoch": 0.8460013218770654, + "grad_norm": 0.51619553565979, + "learning_rate": 2.149665459999743e-05, + "loss": 0.1341, + "step": 440 + }, + { + "epoch": 0.8652286246469988, + "grad_norm": 0.6642457842826843, + "learning_rate": 2.1310684846606346e-05, + "loss": 0.1458, + "step": 450 + }, + { + "epoch": 0.884455927416932, + "grad_norm": 0.48370271921157837, + "learning_rate": 2.1120755990749762e-05, + "loss": 0.1584, + "step": 460 + }, + { + "epoch": 0.9036832301868654, + "grad_norm": 0.8130201697349548, + "learning_rate": 2.092695337741671e-05, + "loss": 0.1389, + "step": 470 + }, + { + "epoch": 0.9229105329567987, + "grad_norm": 0.4986889958381653, + "learning_rate": 2.0729364092278456e-05, + "loss": 0.1263, + "step": 480 + }, + { + "epoch": 0.9421378357267319, + "grad_norm": 0.6791219711303711, + "learning_rate": 2.052807692255638e-05, + "loss": 0.1562, + "step": 490 + }, + { + "epoch": 0.9613651384966653, + "grad_norm": 0.6069239974021912, + "learning_rate": 2.0323182317125198e-05, + "loss": 0.1296, + "step": 500 + }, + { + "epoch": 0.9805924412665986, + "grad_norm": 0.6993957161903381, + "learning_rate": 2.011477234586957e-05, + "loss": 0.1695, + "step": 510 + }, + { + "epoch": 0.9978970137595385, + "eval_loss": 0.11108512431383133, + "eval_runtime": 202.9151, + "eval_samples_per_second": 5.283, + "eval_steps_per_second": 5.283, + "step": 519 + } + ], + "logging_steps": 10, + "max_steps": 1560, + "num_input_tokens_seen": 0, + "num_train_epochs": 3, + "save_steps": 173, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 4.531686421118976e+17, + "train_batch_size": 1, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-519/training_args.bin b/checkpoint-519/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..80c5d5c3761b0cd827095650fdc744b87401c78b --- /dev/null +++ b/checkpoint-519/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:70ce65338e377750e8ed719c3c255445a47816429c580e0d811222e2d91c722b +size 5624 diff --git a/checkpoint-692/README.md b/checkpoint-692/README.md new file mode 100644 index 0000000000000000000000000000000000000000..20c165c7feaa880a18e0662941a5b246a698be2e --- /dev/null +++ b/checkpoint-692/README.md @@ -0,0 +1,209 @@ +--- +base_model: Salesforce/Llama-xLAM-2-8b-fc-r +library_name: peft +pipeline_tag: text-generation +tags: +- base_model:adapter:Salesforce/Llama-xLAM-2-8b-fc-r +- lora +- sft +- transformers +- trl +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.17.1 \ No newline at end of file diff --git a/checkpoint-692/adapter_config.json b/checkpoint-692/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..53e6fe7efe54ad6e008096a354e97649fac2cc73 --- /dev/null +++ b/checkpoint-692/adapter_config.json @@ -0,0 +1,39 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "Salesforce/Llama-xLAM-2-8b-fc-r", + "bias": "none", + "corda_config": null, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 32, + "lora_bias": false, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "qalora_group_size": 16, + "r": 16, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "k_proj", + "q_proj", + "o_proj", + "v_proj" + ], + "target_parameters": null, + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/checkpoint-692/adapter_model.safetensors b/checkpoint-692/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..69e576d513f834e8a52637c4c5f9cd92ddadc7ff --- /dev/null +++ b/checkpoint-692/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:89e1eefe7738a40cfa8c80d45a2a9c6829960d12ce54e4dcc1b5fbda02571ee2 +size 54560368 diff --git a/checkpoint-692/optimizer.pt b/checkpoint-692/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..cc9aec729b798380923124646b943f7976f1a58e --- /dev/null +++ b/checkpoint-692/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:13c2d71a679c4c3d589941c873a326c93e5a2f0fd2c0d0abd91e323ea23804d5 +size 109267450 diff --git a/checkpoint-692/rng_state.pth b/checkpoint-692/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..f0b7d3ed42c1d50abc7820e50b0277510a8e6a3a --- /dev/null +++ b/checkpoint-692/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:50a8c1a9536f2a2e783fa8d0753d9af17c78feb6be9829093b6d6ea5f73331ff +size 14244 diff --git a/checkpoint-692/scheduler.pt b/checkpoint-692/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..dee126caa67a0ff27fbdae57e2acc058c34c28af --- /dev/null +++ b/checkpoint-692/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:06de51d68cf244f891e1fd0dd09ca3ccf7c872a96acca9407cecf684b1913f20 +size 1064 diff --git a/checkpoint-692/special_tokens_map.json b/checkpoint-692/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..1c69f55fd7bb10288d16d4c76497e21e25adfe4f --- /dev/null +++ b/checkpoint-692/special_tokens_map.json @@ -0,0 +1,27 @@ +{ + "additional_special_tokens": [ + "<|eot_id|>", + "<|eom_id|>" + ], + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/checkpoint-692/tokenizer.json b/checkpoint-692/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..1c1d8d5c9024994f1d3b00f9662b8dd89ca13cf2 --- /dev/null +++ b/checkpoint-692/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b9e4e7fb171f92fd137b777cc2714bf87d11576700a1dcd7a399e7bbe39537b +size 17209920 diff --git a/checkpoint-692/tokenizer_config.json b/checkpoint-692/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..3c3fc3b2c234aa8b39e8ee01e3ee9e6fe54a2a1f --- /dev/null +++ b/checkpoint-692/tokenizer_config.json @@ -0,0 +1,2070 @@ +{ + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|python_tag|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "additional_special_tokens": [ + "<|eot_id|>", + "<|eom_id|>" + ], + "bos_token": "<|begin_of_text|>", + "chat_template": "{{- bos_token }}\n{%- if custom_tools is defined %}\n {%- set tools = custom_tools %}\n{%- endif %}\n{%- if not tools_in_user_message is defined %}\n {%- set tools_in_user_message = true %}\n{%- endif %}\n{%- if not tools is defined %}\n {%- set tools = none %}\n{%- endif %}\n\n{#- Extract system message #}\n{{- \"<|start_header_id|>system<|end_header_id|>\\n\\n\" }}\n{%- if messages[0]['role'] == 'system' %}\n {%- set system_message = messages[0]['content'] | trim %}\n {%- set messages = messages[1:] %}\n {{- system_message + \"\\n\" }}\n{%- else %}\n {%- set system_message = \"You are a helpful assistant that can use tools. You are developed by Salesforce xLAM team.\" %}\n {% set format_instruction %}You have access to a set of tools. When using tools, make calls in a single JSON array: \n\n[{\"name\": \"tool_call_name\", \"arguments\": {\"arg1\": \"value1\", \"arg2\": \"value2\"}}, ... (additional parallel tool calls as needed)]\n\nIf no tool is suitable, state that explicitly. If the user's input lacks required parameters, ask for clarification. Do not interpret or respond until tool results are returned. Once they are available, process them or make additional calls if needed. For tasks that don't require tools, such as casual conversation or general advice, respond directly in plain text. The available tools are:{% endset %}\n {{- system_message + \"\\n\" }}\n {%- if tools is not none %}\n {{- format_instruction + \"\\n\\n\" }}\n {%- endif %}\n{%- endif %}\n\n\n{%- if tools is not none %}\n {%- for t in tools %}\n {{- t | tojson(indent=4) }}\n {{- \"\\n\\n\" }}\n {%- endfor %}\n{%- endif %}\n{{- \"<|eot_id|>\" }}\n\n{%- for message in messages %}\n {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %}\n {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\\n\\n'+ message['content'] | trim + '<|eot_id|>' }}\n {%- elif 'tool_calls' in message %}\n {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' -}}\n {%- if message['tool_calls'] %}\n {{- \"[\" }}\n {%- for tool_call_function in message.tool_calls %}\n {%- set tool_call = tool_call_function.function %}\n {{- '{\"name\": \"' + tool_call.name + '\", ' }}\n {{- '\"arguments\": ' }}\n {{- tool_call.arguments | tojson }}\n {{- \"}\" }}\n {%- if not loop.last %}\n {{- \", \" }}\n {%- endif %}\n {%- endfor %}\n {{- \"]\" }}\n {{- \"<|eot_id|>\" }}\n {%- elif message['content'] %}\n {{- message['content'] | trim + '<|eot_id|>' }}\n {%- else %}\n {{- \"[]\\n\" + '<|eot_id|>' }}\n {%- endif %}\n {%- elif message.role == \"tool\" or message.role == \"ipython\" %}\n {{- \"<|start_header_id|>\" + \"ipython\" + \"<|end_header_id|>\\n\\n\" }}\n {%- set content = message[\"content\"] %}\n {%- if content is mapping or (content is iterable and content is not string) %}\n {{- content | tojson }}\n {%- else %}\n {{- content }}\n {%- endif %}\n {{- \"<|eot_id|>\" }}\n {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' }}\n{%- endif %}", + "clean_up_tokenization_spaces": true, + "eos_token": "<|eot_id|>", + "extra_special_tokens": {}, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 16384, + "pad_token": "<|eot_id|>", + "padding_side": "right", + "split_special_tokens": false, + "tokenizer_class": "PreTrainedTokenizerFast" +} diff --git a/checkpoint-692/trainer_state.json b/checkpoint-692/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..e84eed926c149045abc75d8639f64aced6d5e9cc --- /dev/null +++ b/checkpoint-692/trainer_state.json @@ -0,0 +1,548 @@ +{ + "best_metric": 0.10604555904865265, + "best_model_checkpoint": "./xlam_lora_new_2560_1_delete_over_size_3epoch_multi/checkpoint-692", + "epoch": 1.3305293516793848, + "eval_steps": 173, + "global_step": 692, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.019227302769933306, + "grad_norm": 0.8050442337989807, + "learning_rate": 3.205128205128205e-06, + "loss": 0.6642, + "step": 10 + }, + { + "epoch": 0.03845460553986661, + "grad_norm": 0.45196670293807983, + "learning_rate": 6.41025641025641e-06, + "loss": 0.6377, + "step": 20 + }, + { + "epoch": 0.05768190830979992, + "grad_norm": 0.47538116574287415, + "learning_rate": 9.615384615384616e-06, + "loss": 0.6205, + "step": 30 + }, + { + "epoch": 0.07690921107973323, + "grad_norm": 0.34218236804008484, + "learning_rate": 1.282051282051282e-05, + "loss": 0.5579, + "step": 40 + }, + { + "epoch": 0.09613651384966652, + "grad_norm": 0.38529354333877563, + "learning_rate": 1.602564102564103e-05, + "loss": 0.4393, + "step": 50 + }, + { + "epoch": 0.11536381661959984, + "grad_norm": 0.3676348924636841, + "learning_rate": 1.923076923076923e-05, + "loss": 0.3965, + "step": 60 + }, + { + "epoch": 0.13459111938953314, + "grad_norm": 0.3427989184856415, + "learning_rate": 2.2435897435897437e-05, + "loss": 0.3488, + "step": 70 + }, + { + "epoch": 0.15381842215946645, + "grad_norm": 0.30555886030197144, + "learning_rate": 2.4999887657859027e-05, + "loss": 0.2224, + "step": 80 + }, + { + "epoch": 0.17304572492939974, + "grad_norm": 0.3501119315624237, + "learning_rate": 2.4995955894949523e-05, + "loss": 0.248, + "step": 90 + }, + { + "epoch": 0.19227302769933305, + "grad_norm": 0.36164987087249756, + "learning_rate": 2.4986409044149163e-05, + "loss": 0.2322, + "step": 100 + }, + { + "epoch": 0.21150033046926636, + "grad_norm": 0.3375028967857361, + "learning_rate": 2.4971251395358342e-05, + "loss": 0.2427, + "step": 110 + }, + { + "epoch": 0.23072763323919968, + "grad_norm": 0.3321882486343384, + "learning_rate": 2.495048975970308e-05, + "loss": 0.1967, + "step": 120 + }, + { + "epoch": 0.24995493600913296, + "grad_norm": 0.2828320562839508, + "learning_rate": 2.492413346647437e-05, + "loss": 0.1636, + "step": 130 + }, + { + "epoch": 0.2691822387790663, + "grad_norm": 0.3430372476577759, + "learning_rate": 2.4892194358936095e-05, + "loss": 0.2041, + "step": 140 + }, + { + "epoch": 0.28840954154899956, + "grad_norm": 0.3393559455871582, + "learning_rate": 2.4854686789003173e-05, + "loss": 0.1911, + "step": 150 + }, + { + "epoch": 0.3076368443189329, + "grad_norm": 0.36908936500549316, + "learning_rate": 2.4811627610792543e-05, + "loss": 0.2025, + "step": 160 + }, + { + "epoch": 0.3268641470888662, + "grad_norm": 0.38679710030555725, + "learning_rate": 2.4763036173049677e-05, + "loss": 0.1566, + "step": 170 + }, + { + "epoch": 0.3326323379198462, + "eval_loss": 0.14381718635559082, + "eval_runtime": 202.8561, + "eval_samples_per_second": 5.285, + "eval_steps_per_second": 5.285, + "step": 173 + }, + { + "epoch": 0.34609144985879947, + "grad_norm": 0.39648687839508057, + "learning_rate": 2.4708934310454207e-05, + "loss": 0.1943, + "step": 180 + }, + { + "epoch": 0.3653187526287328, + "grad_norm": 0.3979399800300598, + "learning_rate": 2.4649346333808458e-05, + "loss": 0.1594, + "step": 190 + }, + { + "epoch": 0.3845460553986661, + "grad_norm": 0.34854283928871155, + "learning_rate": 2.458429901911331e-05, + "loss": 0.1683, + "step": 200 + }, + { + "epoch": 0.40377335816859944, + "grad_norm": 0.26675811409950256, + "learning_rate": 2.4513821595536356e-05, + "loss": 0.1616, + "step": 210 + }, + { + "epoch": 0.4230006609385327, + "grad_norm": 0.4399104118347168, + "learning_rate": 2.44379457322777e-05, + "loss": 0.1664, + "step": 220 + }, + { + "epoch": 0.442227963708466, + "grad_norm": 0.5316939353942871, + "learning_rate": 2.4356705524339317e-05, + "loss": 0.1745, + "step": 230 + }, + { + "epoch": 0.46145526647839935, + "grad_norm": 0.5996547341346741, + "learning_rate": 2.4270137477204408e-05, + "loss": 0.1753, + "step": 240 + }, + { + "epoch": 0.48068256924833264, + "grad_norm": 0.4330001175403595, + "learning_rate": 2.417828049043353e-05, + "loss": 0.1997, + "step": 250 + }, + { + "epoch": 0.4999098720182659, + "grad_norm": 0.4255751073360443, + "learning_rate": 2.4081175840185022e-05, + "loss": 0.1728, + "step": 260 + }, + { + "epoch": 0.5191371747881992, + "grad_norm": 0.536382257938385, + "learning_rate": 2.3978867160667457e-05, + "loss": 0.147, + "step": 270 + }, + { + "epoch": 0.5383644775581325, + "grad_norm": 0.5623698830604553, + "learning_rate": 2.3871400424532493e-05, + "loss": 0.1863, + "step": 280 + }, + { + "epoch": 0.5575917803280659, + "grad_norm": 0.49679550528526306, + "learning_rate": 2.375882392221695e-05, + "loss": 0.1685, + "step": 290 + }, + { + "epoch": 0.5768190830979991, + "grad_norm": 0.5784851908683777, + "learning_rate": 2.36411882402434e-05, + "loss": 0.1506, + "step": 300 + }, + { + "epoch": 0.5960463858679325, + "grad_norm": 0.6098183393478394, + "learning_rate": 2.3518546238489e-05, + "loss": 0.1565, + "step": 310 + }, + { + "epoch": 0.6152736886378658, + "grad_norm": 0.5198598504066467, + "learning_rate": 2.339095302643273e-05, + "loss": 0.1433, + "step": 320 + }, + { + "epoch": 0.634500991407799, + "grad_norm": 0.5796005129814148, + "learning_rate": 2.325846593839188e-05, + "loss": 0.1668, + "step": 330 + }, + { + "epoch": 0.6537282941777324, + "grad_norm": 0.6006646752357483, + "learning_rate": 2.312114450775869e-05, + "loss": 0.1505, + "step": 340 + }, + { + "epoch": 0.6652646758396924, + "eval_loss": 0.1198095753788948, + "eval_runtime": 202.8938, + "eval_samples_per_second": 5.284, + "eval_steps_per_second": 5.284, + "step": 346 + }, + { + "epoch": 0.6729555969476657, + "grad_norm": 0.5787773728370667, + "learning_rate": 2.2979050440248896e-05, + "loss": 0.1442, + "step": 350 + }, + { + "epoch": 0.6921828997175989, + "grad_norm": 0.5230283141136169, + "learning_rate": 2.2832247586174118e-05, + "loss": 0.1555, + "step": 360 + }, + { + "epoch": 0.7114102024875323, + "grad_norm": 0.5551069378852844, + "learning_rate": 2.2680801911750558e-05, + "loss": 0.1422, + "step": 370 + }, + { + "epoch": 0.7306375052574656, + "grad_norm": 0.5769614577293396, + "learning_rate": 2.2524781469456928e-05, + "loss": 0.165, + "step": 380 + }, + { + "epoch": 0.7498648080273989, + "grad_norm": 0.6609200239181519, + "learning_rate": 2.2364256367454922e-05, + "loss": 0.161, + "step": 390 + }, + { + "epoch": 0.7690921107973322, + "grad_norm": 0.5530131459236145, + "learning_rate": 2.2199298738085907e-05, + "loss": 0.1709, + "step": 400 + }, + { + "epoch": 0.7883194135672655, + "grad_norm": 0.7019795775413513, + "learning_rate": 2.2029982705458107e-05, + "loss": 0.1471, + "step": 410 + }, + { + "epoch": 0.8075467163371989, + "grad_norm": 0.5327528715133667, + "learning_rate": 2.1856384352138765e-05, + "loss": 0.1913, + "step": 420 + }, + { + "epoch": 0.8267740191071321, + "grad_norm": 0.5548112988471985, + "learning_rate": 2.1678581684966235e-05, + "loss": 0.1509, + "step": 430 + }, + { + "epoch": 0.8460013218770654, + "grad_norm": 0.51619553565979, + "learning_rate": 2.149665459999743e-05, + "loss": 0.1341, + "step": 440 + }, + { + "epoch": 0.8652286246469988, + "grad_norm": 0.6642457842826843, + "learning_rate": 2.1310684846606346e-05, + "loss": 0.1458, + "step": 450 + }, + { + "epoch": 0.884455927416932, + "grad_norm": 0.48370271921157837, + "learning_rate": 2.1120755990749762e-05, + "loss": 0.1584, + "step": 460 + }, + { + "epoch": 0.9036832301868654, + "grad_norm": 0.8130201697349548, + "learning_rate": 2.092695337741671e-05, + "loss": 0.1389, + "step": 470 + }, + { + "epoch": 0.9229105329567987, + "grad_norm": 0.4986889958381653, + "learning_rate": 2.0729364092278456e-05, + "loss": 0.1263, + "step": 480 + }, + { + "epoch": 0.9421378357267319, + "grad_norm": 0.6791219711303711, + "learning_rate": 2.052807692255638e-05, + "loss": 0.1562, + "step": 490 + }, + { + "epoch": 0.9613651384966653, + "grad_norm": 0.6069239974021912, + "learning_rate": 2.0323182317125198e-05, + "loss": 0.1296, + "step": 500 + }, + { + "epoch": 0.9805924412665986, + "grad_norm": 0.6993957161903381, + "learning_rate": 2.011477234586957e-05, + "loss": 0.1695, + "step": 510 + }, + { + "epoch": 0.9978970137595385, + "eval_loss": 0.11108512431383133, + "eval_runtime": 202.9151, + "eval_samples_per_second": 5.283, + "eval_steps_per_second": 5.283, + "step": 519 + }, + { + "epoch": 0.9998197440365318, + "grad_norm": 0.5495030283927917, + "learning_rate": 1.9902940658312253e-05, + "loss": 0.1512, + "step": 520 + }, + { + "epoch": 1.0190470468064652, + "grad_norm": 0.5100754499435425, + "learning_rate": 1.968778244153246e-05, + "loss": 0.1088, + "step": 530 + }, + { + "epoch": 1.0382743495763984, + "grad_norm": 0.6836853623390198, + "learning_rate": 1.9469394377393335e-05, + "loss": 0.1524, + "step": 540 + }, + { + "epoch": 1.0575016523463319, + "grad_norm": 0.5304776430130005, + "learning_rate": 1.9247874599097714e-05, + "loss": 0.1239, + "step": 550 + }, + { + "epoch": 1.076728955116265, + "grad_norm": 0.6995298862457275, + "learning_rate": 1.9023322647091736e-05, + "loss": 0.1203, + "step": 560 + }, + { + "epoch": 1.0959562578861983, + "grad_norm": 0.579207181930542, + "learning_rate": 1.8795839424336097e-05, + "loss": 0.134, + "step": 570 + }, + { + "epoch": 1.1151835606561318, + "grad_norm": 0.4746134877204895, + "learning_rate": 1.8565527150965077e-05, + "loss": 0.1344, + "step": 580 + }, + { + "epoch": 1.134410863426065, + "grad_norm": 0.8127744793891907, + "learning_rate": 1.8332489318353655e-05, + "loss": 0.1157, + "step": 590 + }, + { + "epoch": 1.1536381661959982, + "grad_norm": 0.6949151158332825, + "learning_rate": 1.809683064261343e-05, + "loss": 0.1197, + "step": 600 + }, + { + "epoch": 1.1728654689659317, + "grad_norm": 0.6869731545448303, + "learning_rate": 1.7858657017538178e-05, + "loss": 0.1392, + "step": 610 + }, + { + "epoch": 1.192092771735865, + "grad_norm": 0.7461158037185669, + "learning_rate": 1.7618075467020213e-05, + "loss": 0.1262, + "step": 620 + }, + { + "epoch": 1.2113200745057981, + "grad_norm": 0.5442166924476624, + "learning_rate": 1.7375194096958946e-05, + "loss": 0.1258, + "step": 630 + }, + { + "epoch": 1.2305473772757316, + "grad_norm": 0.7670741081237793, + "learning_rate": 1.713012204668325e-05, + "loss": 0.1204, + "step": 640 + }, + { + "epoch": 1.2497746800456648, + "grad_norm": 0.3919640779495239, + "learning_rate": 1.6882969439909434e-05, + "loss": 0.1444, + "step": 650 + }, + { + "epoch": 1.269001982815598, + "grad_norm": 0.6234434247016907, + "learning_rate": 1.663384733525686e-05, + "loss": 0.1245, + "step": 660 + }, + { + "epoch": 1.2882292855855315, + "grad_norm": 0.7237009406089783, + "learning_rate": 1.638286767634353e-05, + "loss": 0.1258, + "step": 670 + }, + { + "epoch": 1.3074565883554647, + "grad_norm": 0.6398624181747437, + "learning_rate": 1.613014324148392e-05, + "loss": 0.1519, + "step": 680 + }, + { + "epoch": 1.326683891125398, + "grad_norm": 0.7676591873168945, + "learning_rate": 1.5875787593011784e-05, + "loss": 0.1545, + "step": 690 + }, + { + "epoch": 1.3305293516793848, + "eval_loss": 0.10604555904865265, + "eval_runtime": 203.0173, + "eval_samples_per_second": 5.28, + "eval_steps_per_second": 5.28, + "step": 692 + } + ], + "logging_steps": 10, + "max_steps": 1560, + "num_input_tokens_seen": 0, + "num_train_epochs": 3, + "save_steps": 173, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 6.044843365593907e+17, + "train_batch_size": 1, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-692/training_args.bin b/checkpoint-692/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..80c5d5c3761b0cd827095650fdc744b87401c78b --- /dev/null +++ b/checkpoint-692/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:70ce65338e377750e8ed719c3c255445a47816429c580e0d811222e2d91c722b +size 5624 diff --git a/checkpoint-865/README.md b/checkpoint-865/README.md new file mode 100644 index 0000000000000000000000000000000000000000..20c165c7feaa880a18e0662941a5b246a698be2e --- /dev/null +++ b/checkpoint-865/README.md @@ -0,0 +1,209 @@ +--- +base_model: Salesforce/Llama-xLAM-2-8b-fc-r +library_name: peft +pipeline_tag: text-generation +tags: +- base_model:adapter:Salesforce/Llama-xLAM-2-8b-fc-r +- lora +- sft +- transformers +- trl +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.17.1 \ No newline at end of file diff --git a/checkpoint-865/adapter_config.json b/checkpoint-865/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..53e6fe7efe54ad6e008096a354e97649fac2cc73 --- /dev/null +++ b/checkpoint-865/adapter_config.json @@ -0,0 +1,39 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "Salesforce/Llama-xLAM-2-8b-fc-r", + "bias": "none", + "corda_config": null, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 32, + "lora_bias": false, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "qalora_group_size": 16, + "r": 16, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "k_proj", + "q_proj", + "o_proj", + "v_proj" + ], + "target_parameters": null, + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/checkpoint-865/adapter_model.safetensors b/checkpoint-865/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..80c07fdf7b5e3a933595d5638caf892de7777b58 --- /dev/null +++ b/checkpoint-865/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ea36bf9e348dde8b0958de872d83cac3e2368a72387fcb44125ed42c6b79eef6 +size 54560368 diff --git a/checkpoint-865/optimizer.pt b/checkpoint-865/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..d8f8b0d064c67a0fd7a448eceb3a55bd53fcb34f --- /dev/null +++ b/checkpoint-865/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:30ca14f2e73107983e59cb62174ac72c2d297cf3d57ae8eab14657ee2ddeb523 +size 109267450 diff --git a/checkpoint-865/rng_state.pth b/checkpoint-865/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..d60e23c065eb8fbac57c89520a287dcbf6a76cd9 --- /dev/null +++ b/checkpoint-865/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:27a6140b27245b25c20a52a3fe73f20b1e186add0b1fdce4a10fefbee9338986 +size 14244 diff --git a/checkpoint-865/scheduler.pt b/checkpoint-865/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..1fbf6bd57b22ee0eceb230f4fe2bdb382b5a22c0 --- /dev/null +++ b/checkpoint-865/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b5c0b1c3bebaa8d925447d0a6885e10699bd7815eb0b94fd9d8dbbaacec28ca0 +size 1064 diff --git a/checkpoint-865/special_tokens_map.json b/checkpoint-865/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..1c69f55fd7bb10288d16d4c76497e21e25adfe4f --- /dev/null +++ b/checkpoint-865/special_tokens_map.json @@ -0,0 +1,27 @@ +{ + "additional_special_tokens": [ + "<|eot_id|>", + "<|eom_id|>" + ], + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/checkpoint-865/tokenizer.json b/checkpoint-865/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..1c1d8d5c9024994f1d3b00f9662b8dd89ca13cf2 --- /dev/null +++ b/checkpoint-865/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b9e4e7fb171f92fd137b777cc2714bf87d11576700a1dcd7a399e7bbe39537b +size 17209920 diff --git a/checkpoint-865/tokenizer_config.json b/checkpoint-865/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..3c3fc3b2c234aa8b39e8ee01e3ee9e6fe54a2a1f --- /dev/null +++ b/checkpoint-865/tokenizer_config.json @@ -0,0 +1,2070 @@ +{ + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|python_tag|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "additional_special_tokens": [ + "<|eot_id|>", + "<|eom_id|>" + ], + "bos_token": "<|begin_of_text|>", + "chat_template": "{{- bos_token }}\n{%- if custom_tools is defined %}\n {%- set tools = custom_tools %}\n{%- endif %}\n{%- if not tools_in_user_message is defined %}\n {%- set tools_in_user_message = true %}\n{%- endif %}\n{%- if not tools is defined %}\n {%- set tools = none %}\n{%- endif %}\n\n{#- Extract system message #}\n{{- \"<|start_header_id|>system<|end_header_id|>\\n\\n\" }}\n{%- if messages[0]['role'] == 'system' %}\n {%- set system_message = messages[0]['content'] | trim %}\n {%- set messages = messages[1:] %}\n {{- system_message + \"\\n\" }}\n{%- else %}\n {%- set system_message = \"You are a helpful assistant that can use tools. You are developed by Salesforce xLAM team.\" %}\n {% set format_instruction %}You have access to a set of tools. When using tools, make calls in a single JSON array: \n\n[{\"name\": \"tool_call_name\", \"arguments\": {\"arg1\": \"value1\", \"arg2\": \"value2\"}}, ... (additional parallel tool calls as needed)]\n\nIf no tool is suitable, state that explicitly. If the user's input lacks required parameters, ask for clarification. Do not interpret or respond until tool results are returned. Once they are available, process them or make additional calls if needed. For tasks that don't require tools, such as casual conversation or general advice, respond directly in plain text. The available tools are:{% endset %}\n {{- system_message + \"\\n\" }}\n {%- if tools is not none %}\n {{- format_instruction + \"\\n\\n\" }}\n {%- endif %}\n{%- endif %}\n\n\n{%- if tools is not none %}\n {%- for t in tools %}\n {{- t | tojson(indent=4) }}\n {{- \"\\n\\n\" }}\n {%- endfor %}\n{%- endif %}\n{{- \"<|eot_id|>\" }}\n\n{%- for message in messages %}\n {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %}\n {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\\n\\n'+ message['content'] | trim + '<|eot_id|>' }}\n {%- elif 'tool_calls' in message %}\n {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' -}}\n {%- if message['tool_calls'] %}\n {{- \"[\" }}\n {%- for tool_call_function in message.tool_calls %}\n {%- set tool_call = tool_call_function.function %}\n {{- '{\"name\": \"' + tool_call.name + '\", ' }}\n {{- '\"arguments\": ' }}\n {{- tool_call.arguments | tojson }}\n {{- \"}\" }}\n {%- if not loop.last %}\n {{- \", \" }}\n {%- endif %}\n {%- endfor %}\n {{- \"]\" }}\n {{- \"<|eot_id|>\" }}\n {%- elif message['content'] %}\n {{- message['content'] | trim + '<|eot_id|>' }}\n {%- else %}\n {{- \"[]\\n\" + '<|eot_id|>' }}\n {%- endif %}\n {%- elif message.role == \"tool\" or message.role == \"ipython\" %}\n {{- \"<|start_header_id|>\" + \"ipython\" + \"<|end_header_id|>\\n\\n\" }}\n {%- set content = message[\"content\"] %}\n {%- if content is mapping or (content is iterable and content is not string) %}\n {{- content | tojson }}\n {%- else %}\n {{- content }}\n {%- endif %}\n {{- \"<|eot_id|>\" }}\n {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' }}\n{%- endif %}", + "clean_up_tokenization_spaces": true, + "eos_token": "<|eot_id|>", + "extra_special_tokens": {}, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 16384, + "pad_token": "<|eot_id|>", + "padding_side": "right", + "split_special_tokens": false, + "tokenizer_class": "PreTrainedTokenizerFast" +} diff --git a/checkpoint-865/trainer_state.json b/checkpoint-865/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..7e26ad881b45404300d021f20f3bdaf5a6fca8ae --- /dev/null +++ b/checkpoint-865/trainer_state.json @@ -0,0 +1,675 @@ +{ + "best_metric": 0.10293085128068924, + "best_model_checkpoint": "./xlam_lora_new_2560_1_delete_over_size_3epoch_multi/checkpoint-865", + "epoch": 1.663161689599231, + "eval_steps": 173, + "global_step": 865, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.019227302769933306, + "grad_norm": 0.8050442337989807, + "learning_rate": 3.205128205128205e-06, + "loss": 0.6642, + "step": 10 + }, + { + "epoch": 0.03845460553986661, + "grad_norm": 0.45196670293807983, + "learning_rate": 6.41025641025641e-06, + "loss": 0.6377, + "step": 20 + }, + { + "epoch": 0.05768190830979992, + "grad_norm": 0.47538116574287415, + "learning_rate": 9.615384615384616e-06, + "loss": 0.6205, + "step": 30 + }, + { + "epoch": 0.07690921107973323, + "grad_norm": 0.34218236804008484, + "learning_rate": 1.282051282051282e-05, + "loss": 0.5579, + "step": 40 + }, + { + "epoch": 0.09613651384966652, + "grad_norm": 0.38529354333877563, + "learning_rate": 1.602564102564103e-05, + "loss": 0.4393, + "step": 50 + }, + { + "epoch": 0.11536381661959984, + "grad_norm": 0.3676348924636841, + "learning_rate": 1.923076923076923e-05, + "loss": 0.3965, + "step": 60 + }, + { + "epoch": 0.13459111938953314, + "grad_norm": 0.3427989184856415, + "learning_rate": 2.2435897435897437e-05, + "loss": 0.3488, + "step": 70 + }, + { + "epoch": 0.15381842215946645, + "grad_norm": 0.30555886030197144, + "learning_rate": 2.4999887657859027e-05, + "loss": 0.2224, + "step": 80 + }, + { + "epoch": 0.17304572492939974, + "grad_norm": 0.3501119315624237, + "learning_rate": 2.4995955894949523e-05, + "loss": 0.248, + "step": 90 + }, + { + "epoch": 0.19227302769933305, + "grad_norm": 0.36164987087249756, + "learning_rate": 2.4986409044149163e-05, + "loss": 0.2322, + "step": 100 + }, + { + "epoch": 0.21150033046926636, + "grad_norm": 0.3375028967857361, + "learning_rate": 2.4971251395358342e-05, + "loss": 0.2427, + "step": 110 + }, + { + "epoch": 0.23072763323919968, + "grad_norm": 0.3321882486343384, + "learning_rate": 2.495048975970308e-05, + "loss": 0.1967, + "step": 120 + }, + { + "epoch": 0.24995493600913296, + "grad_norm": 0.2828320562839508, + "learning_rate": 2.492413346647437e-05, + "loss": 0.1636, + "step": 130 + }, + { + "epoch": 0.2691822387790663, + "grad_norm": 0.3430372476577759, + "learning_rate": 2.4892194358936095e-05, + "loss": 0.2041, + "step": 140 + }, + { + "epoch": 0.28840954154899956, + "grad_norm": 0.3393559455871582, + "learning_rate": 2.4854686789003173e-05, + "loss": 0.1911, + "step": 150 + }, + { + "epoch": 0.3076368443189329, + "grad_norm": 0.36908936500549316, + "learning_rate": 2.4811627610792543e-05, + "loss": 0.2025, + "step": 160 + }, + { + "epoch": 0.3268641470888662, + "grad_norm": 0.38679710030555725, + "learning_rate": 2.4763036173049677e-05, + "loss": 0.1566, + "step": 170 + }, + { + "epoch": 0.3326323379198462, + "eval_loss": 0.14381718635559082, + "eval_runtime": 202.8561, + "eval_samples_per_second": 5.285, + "eval_steps_per_second": 5.285, + "step": 173 + }, + { + "epoch": 0.34609144985879947, + "grad_norm": 0.39648687839508057, + "learning_rate": 2.4708934310454207e-05, + "loss": 0.1943, + "step": 180 + }, + { + "epoch": 0.3653187526287328, + "grad_norm": 0.3979399800300598, + "learning_rate": 2.4649346333808458e-05, + "loss": 0.1594, + "step": 190 + }, + { + "epoch": 0.3845460553986661, + "grad_norm": 0.34854283928871155, + "learning_rate": 2.458429901911331e-05, + "loss": 0.1683, + "step": 200 + }, + { + "epoch": 0.40377335816859944, + "grad_norm": 0.26675811409950256, + "learning_rate": 2.4513821595536356e-05, + "loss": 0.1616, + "step": 210 + }, + { + "epoch": 0.4230006609385327, + "grad_norm": 0.4399104118347168, + "learning_rate": 2.44379457322777e-05, + "loss": 0.1664, + "step": 220 + }, + { + "epoch": 0.442227963708466, + "grad_norm": 0.5316939353942871, + "learning_rate": 2.4356705524339317e-05, + "loss": 0.1745, + "step": 230 + }, + { + "epoch": 0.46145526647839935, + "grad_norm": 0.5996547341346741, + "learning_rate": 2.4270137477204408e-05, + "loss": 0.1753, + "step": 240 + }, + { + "epoch": 0.48068256924833264, + "grad_norm": 0.4330001175403595, + "learning_rate": 2.417828049043353e-05, + "loss": 0.1997, + "step": 250 + }, + { + "epoch": 0.4999098720182659, + "grad_norm": 0.4255751073360443, + "learning_rate": 2.4081175840185022e-05, + "loss": 0.1728, + "step": 260 + }, + { + "epoch": 0.5191371747881992, + "grad_norm": 0.536382257938385, + "learning_rate": 2.3978867160667457e-05, + "loss": 0.147, + "step": 270 + }, + { + "epoch": 0.5383644775581325, + "grad_norm": 0.5623698830604553, + "learning_rate": 2.3871400424532493e-05, + "loss": 0.1863, + "step": 280 + }, + { + "epoch": 0.5575917803280659, + "grad_norm": 0.49679550528526306, + "learning_rate": 2.375882392221695e-05, + "loss": 0.1685, + "step": 290 + }, + { + "epoch": 0.5768190830979991, + "grad_norm": 0.5784851908683777, + "learning_rate": 2.36411882402434e-05, + "loss": 0.1506, + "step": 300 + }, + { + "epoch": 0.5960463858679325, + "grad_norm": 0.6098183393478394, + "learning_rate": 2.3518546238489e-05, + "loss": 0.1565, + "step": 310 + }, + { + "epoch": 0.6152736886378658, + "grad_norm": 0.5198598504066467, + "learning_rate": 2.339095302643273e-05, + "loss": 0.1433, + "step": 320 + }, + { + "epoch": 0.634500991407799, + "grad_norm": 0.5796005129814148, + "learning_rate": 2.325846593839188e-05, + "loss": 0.1668, + "step": 330 + }, + { + "epoch": 0.6537282941777324, + "grad_norm": 0.6006646752357483, + "learning_rate": 2.312114450775869e-05, + "loss": 0.1505, + "step": 340 + }, + { + "epoch": 0.6652646758396924, + "eval_loss": 0.1198095753788948, + "eval_runtime": 202.8938, + "eval_samples_per_second": 5.284, + "eval_steps_per_second": 5.284, + "step": 346 + }, + { + "epoch": 0.6729555969476657, + "grad_norm": 0.5787773728370667, + "learning_rate": 2.2979050440248896e-05, + "loss": 0.1442, + "step": 350 + }, + { + "epoch": 0.6921828997175989, + "grad_norm": 0.5230283141136169, + "learning_rate": 2.2832247586174118e-05, + "loss": 0.1555, + "step": 360 + }, + { + "epoch": 0.7114102024875323, + "grad_norm": 0.5551069378852844, + "learning_rate": 2.2680801911750558e-05, + "loss": 0.1422, + "step": 370 + }, + { + "epoch": 0.7306375052574656, + "grad_norm": 0.5769614577293396, + "learning_rate": 2.2524781469456928e-05, + "loss": 0.165, + "step": 380 + }, + { + "epoch": 0.7498648080273989, + "grad_norm": 0.6609200239181519, + "learning_rate": 2.2364256367454922e-05, + "loss": 0.161, + "step": 390 + }, + { + "epoch": 0.7690921107973322, + "grad_norm": 0.5530131459236145, + "learning_rate": 2.2199298738085907e-05, + "loss": 0.1709, + "step": 400 + }, + { + "epoch": 0.7883194135672655, + "grad_norm": 0.7019795775413513, + "learning_rate": 2.2029982705458107e-05, + "loss": 0.1471, + "step": 410 + }, + { + "epoch": 0.8075467163371989, + "grad_norm": 0.5327528715133667, + "learning_rate": 2.1856384352138765e-05, + "loss": 0.1913, + "step": 420 + }, + { + "epoch": 0.8267740191071321, + "grad_norm": 0.5548112988471985, + "learning_rate": 2.1678581684966235e-05, + "loss": 0.1509, + "step": 430 + }, + { + "epoch": 0.8460013218770654, + "grad_norm": 0.51619553565979, + "learning_rate": 2.149665459999743e-05, + "loss": 0.1341, + "step": 440 + }, + { + "epoch": 0.8652286246469988, + "grad_norm": 0.6642457842826843, + "learning_rate": 2.1310684846606346e-05, + "loss": 0.1458, + "step": 450 + }, + { + "epoch": 0.884455927416932, + "grad_norm": 0.48370271921157837, + "learning_rate": 2.1120755990749762e-05, + "loss": 0.1584, + "step": 460 + }, + { + "epoch": 0.9036832301868654, + "grad_norm": 0.8130201697349548, + "learning_rate": 2.092695337741671e-05, + "loss": 0.1389, + "step": 470 + }, + { + "epoch": 0.9229105329567987, + "grad_norm": 0.4986889958381653, + "learning_rate": 2.0729364092278456e-05, + "loss": 0.1263, + "step": 480 + }, + { + "epoch": 0.9421378357267319, + "grad_norm": 0.6791219711303711, + "learning_rate": 2.052807692255638e-05, + "loss": 0.1562, + "step": 490 + }, + { + "epoch": 0.9613651384966653, + "grad_norm": 0.6069239974021912, + "learning_rate": 2.0323182317125198e-05, + "loss": 0.1296, + "step": 500 + }, + { + "epoch": 0.9805924412665986, + "grad_norm": 0.6993957161903381, + "learning_rate": 2.011477234586957e-05, + "loss": 0.1695, + "step": 510 + }, + { + "epoch": 0.9978970137595385, + "eval_loss": 0.11108512431383133, + "eval_runtime": 202.9151, + "eval_samples_per_second": 5.283, + "eval_steps_per_second": 5.283, + "step": 519 + }, + { + "epoch": 0.9998197440365318, + "grad_norm": 0.5495030283927917, + "learning_rate": 1.9902940658312253e-05, + "loss": 0.1512, + "step": 520 + }, + { + "epoch": 1.0190470468064652, + "grad_norm": 0.5100754499435425, + "learning_rate": 1.968778244153246e-05, + "loss": 0.1088, + "step": 530 + }, + { + "epoch": 1.0382743495763984, + "grad_norm": 0.6836853623390198, + "learning_rate": 1.9469394377393335e-05, + "loss": 0.1524, + "step": 540 + }, + { + "epoch": 1.0575016523463319, + "grad_norm": 0.5304776430130005, + "learning_rate": 1.9247874599097714e-05, + "loss": 0.1239, + "step": 550 + }, + { + "epoch": 1.076728955116265, + "grad_norm": 0.6995298862457275, + "learning_rate": 1.9023322647091736e-05, + "loss": 0.1203, + "step": 560 + }, + { + "epoch": 1.0959562578861983, + "grad_norm": 0.579207181930542, + "learning_rate": 1.8795839424336097e-05, + "loss": 0.134, + "step": 570 + }, + { + "epoch": 1.1151835606561318, + "grad_norm": 0.4746134877204895, + "learning_rate": 1.8565527150965077e-05, + "loss": 0.1344, + "step": 580 + }, + { + "epoch": 1.134410863426065, + "grad_norm": 0.8127744793891907, + "learning_rate": 1.8332489318353655e-05, + "loss": 0.1157, + "step": 590 + }, + { + "epoch": 1.1536381661959982, + "grad_norm": 0.6949151158332825, + "learning_rate": 1.809683064261343e-05, + "loss": 0.1197, + "step": 600 + }, + { + "epoch": 1.1728654689659317, + "grad_norm": 0.6869731545448303, + "learning_rate": 1.7858657017538178e-05, + "loss": 0.1392, + "step": 610 + }, + { + "epoch": 1.192092771735865, + "grad_norm": 0.7461158037185669, + "learning_rate": 1.7618075467020213e-05, + "loss": 0.1262, + "step": 620 + }, + { + "epoch": 1.2113200745057981, + "grad_norm": 0.5442166924476624, + "learning_rate": 1.7375194096958946e-05, + "loss": 0.1258, + "step": 630 + }, + { + "epoch": 1.2305473772757316, + "grad_norm": 0.7670741081237793, + "learning_rate": 1.713012204668325e-05, + "loss": 0.1204, + "step": 640 + }, + { + "epoch": 1.2497746800456648, + "grad_norm": 0.3919640779495239, + "learning_rate": 1.6882969439909434e-05, + "loss": 0.1444, + "step": 650 + }, + { + "epoch": 1.269001982815598, + "grad_norm": 0.6234434247016907, + "learning_rate": 1.663384733525686e-05, + "loss": 0.1245, + "step": 660 + }, + { + "epoch": 1.2882292855855315, + "grad_norm": 0.7237009406089783, + "learning_rate": 1.638286767634353e-05, + "loss": 0.1258, + "step": 670 + }, + { + "epoch": 1.3074565883554647, + "grad_norm": 0.6398624181747437, + "learning_rate": 1.613014324148392e-05, + "loss": 0.1519, + "step": 680 + }, + { + "epoch": 1.326683891125398, + "grad_norm": 0.7676591873168945, + "learning_rate": 1.5875787593011784e-05, + "loss": 0.1545, + "step": 690 + }, + { + "epoch": 1.3305293516793848, + "eval_loss": 0.10604555904865265, + "eval_runtime": 203.0173, + "eval_samples_per_second": 5.28, + "eval_steps_per_second": 5.28, + "step": 692 + }, + { + "epoch": 1.3459111938953314, + "grad_norm": 0.5583875775337219, + "learning_rate": 1.5619915026250646e-05, + "loss": 0.1141, + "step": 700 + }, + { + "epoch": 1.3651384966652647, + "grad_norm": 0.5790243148803711, + "learning_rate": 1.536264051815491e-05, + "loss": 0.1326, + "step": 710 + }, + { + "epoch": 1.3843657994351979, + "grad_norm": 0.7467628121376038, + "learning_rate": 1.5104079675644706e-05, + "loss": 0.1439, + "step": 720 + }, + { + "epoch": 1.4035931022051313, + "grad_norm": 0.9867657423019409, + "learning_rate": 1.4844348683657616e-05, + "loss": 0.1385, + "step": 730 + }, + { + "epoch": 1.4228204049750646, + "grad_norm": 0.7909297347068787, + "learning_rate": 1.4583564252940735e-05, + "loss": 0.1259, + "step": 740 + }, + { + "epoch": 1.4420477077449978, + "grad_norm": 0.6159791350364685, + "learning_rate": 1.432184356760637e-05, + "loss": 0.1126, + "step": 750 + }, + { + "epoch": 1.4612750105149312, + "grad_norm": 0.6234619617462158, + "learning_rate": 1.4059304232475098e-05, + "loss": 0.1144, + "step": 760 + }, + { + "epoch": 1.4805023132848645, + "grad_norm": 0.7142959833145142, + "learning_rate": 1.3796064220229765e-05, + "loss": 0.1249, + "step": 770 + }, + { + "epoch": 1.4997296160547977, + "grad_norm": 0.6258341073989868, + "learning_rate": 1.3532241818404156e-05, + "loss": 0.1321, + "step": 780 + }, + { + "epoch": 1.5189569188247312, + "grad_norm": 0.5723307728767395, + "learning_rate": 1.326795557623022e-05, + "loss": 0.1193, + "step": 790 + }, + { + "epoch": 1.5381842215946644, + "grad_norm": 0.7454131841659546, + "learning_rate": 1.300332425136769e-05, + "loss": 0.1281, + "step": 800 + }, + { + "epoch": 1.5574115243645976, + "grad_norm": 0.5975070595741272, + "learning_rate": 1.273846675654003e-05, + "loss": 0.1321, + "step": 810 + }, + { + "epoch": 1.576638827134531, + "grad_norm": 0.7056507468223572, + "learning_rate": 1.2473502106100723e-05, + "loss": 0.1444, + "step": 820 + }, + { + "epoch": 1.5958661299044643, + "grad_norm": 0.7889280915260315, + "learning_rate": 1.2208549362553885e-05, + "loss": 0.1226, + "step": 830 + }, + { + "epoch": 1.6150934326743975, + "grad_norm": 0.7041313648223877, + "learning_rate": 1.194372758305325e-05, + "loss": 0.1316, + "step": 840 + }, + { + "epoch": 1.634320735444331, + "grad_norm": 0.7797935605049133, + "learning_rate": 1.1679155765903524e-05, + "loss": 0.132, + "step": 850 + }, + { + "epoch": 1.6535480382142642, + "grad_norm": 0.6426231861114502, + "learning_rate": 1.1414952797088248e-05, + "loss": 0.1101, + "step": 860 + }, + { + "epoch": 1.663161689599231, + "eval_loss": 0.10293085128068924, + "eval_runtime": 203.1567, + "eval_samples_per_second": 5.277, + "eval_steps_per_second": 5.277, + "step": 865 + } + ], + "logging_steps": 10, + "max_steps": 1560, + "num_input_tokens_seen": 0, + "num_train_epochs": 3, + "save_steps": 173, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 7.561082316609946e+17, + "train_batch_size": 1, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-865/training_args.bin b/checkpoint-865/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..80c5d5c3761b0cd827095650fdc744b87401c78b --- /dev/null +++ b/checkpoint-865/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:70ce65338e377750e8ed719c3c255445a47816429c580e0d811222e2d91c722b +size 5624 diff --git a/final/config.json b/final/config.json new file mode 100644 index 0000000000000000000000000000000000000000..e31d2636403bbac2bc2c61346a86b5adf3b07145 --- /dev/null +++ b/final/config.json @@ -0,0 +1,40 @@ +{ + "_name_or_path": "Salesforce/Llama-xLAM-2-8b-fc-r", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "bos_token_id": 128000, + "eos_token_id": [ + 128001, + 128008, + 128009 + ], + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 4096, + "initializer_range": 0.02, + "intermediate_size": 14336, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "num_attention_heads": 32, + "num_hidden_layers": 32, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 8.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "torch_dtype": "bfloat16", + "transformers_version": "4.46.3", + "use_cache": false, + "vocab_size": 128256 +} diff --git a/final/generation_config.json b/final/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..aba45c8e2bd7d66ff3a5e250c9e9233e137a752c --- /dev/null +++ b/final/generation_config.json @@ -0,0 +1,12 @@ +{ + "bos_token_id": 128000, + "do_sample": true, + "eos_token_id": [ + 128001, + 128008, + 128009 + ], + "temperature": 0.6, + "top_p": 0.9, + "transformers_version": "4.46.3" +} diff --git a/final/model.safetensors.index.json b/final/model.safetensors.index.json new file mode 100644 index 0000000000000000000000000000000000000000..a5349a46ed7d9079ac64dfbb750f494362a71f4e --- /dev/null +++ b/final/model.safetensors.index.json @@ -0,0 +1,554 @@ +{ + "metadata": { + "total_size": 16115048448 + }, + "weight_map": { + "lm_head.weight": "model-00004-of-00004.safetensors", + "model.embed_tokens.weight": "model-00001-of-00004.safetensors", + "model.layers.0.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.0.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.0.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.0.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.0.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.0.self_attn.k_proj.base_layer.weight": "model-00001-of-00004.safetensors", + "model.layers.0.self_attn.k_proj.lora_A.default.weight": "model-00001-of-00004.safetensors", + "model.layers.0.self_attn.k_proj.lora_B.default.weight": "model-00001-of-00004.safetensors", + "model.layers.0.self_attn.o_proj.base_layer.weight": "model-00001-of-00004.safetensors", + "model.layers.0.self_attn.o_proj.lora_A.default.weight": "model-00001-of-00004.safetensors", + "model.layers.0.self_attn.o_proj.lora_B.default.weight": "model-00001-of-00004.safetensors", + "model.layers.0.self_attn.q_proj.base_layer.weight": "model-00001-of-00004.safetensors", + "model.layers.0.self_attn.q_proj.lora_A.default.weight": "model-00001-of-00004.safetensors", + "model.layers.0.self_attn.q_proj.lora_B.default.weight": "model-00001-of-00004.safetensors", + "model.layers.0.self_attn.v_proj.base_layer.weight": "model-00001-of-00004.safetensors", + "model.layers.0.self_attn.v_proj.lora_A.default.weight": "model-00001-of-00004.safetensors", + "model.layers.0.self_attn.v_proj.lora_B.default.weight": "model-00001-of-00004.safetensors", + "model.layers.1.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.1.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.1.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.1.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.1.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.1.self_attn.k_proj.base_layer.weight": "model-00001-of-00004.safetensors", + "model.layers.1.self_attn.k_proj.lora_A.default.weight": "model-00001-of-00004.safetensors", + "model.layers.1.self_attn.k_proj.lora_B.default.weight": "model-00001-of-00004.safetensors", + "model.layers.1.self_attn.o_proj.base_layer.weight": "model-00001-of-00004.safetensors", + "model.layers.1.self_attn.o_proj.lora_A.default.weight": "model-00001-of-00004.safetensors", + "model.layers.1.self_attn.o_proj.lora_B.default.weight": "model-00001-of-00004.safetensors", + "model.layers.1.self_attn.q_proj.base_layer.weight": "model-00001-of-00004.safetensors", + "model.layers.1.self_attn.q_proj.lora_A.default.weight": "model-00001-of-00004.safetensors", + "model.layers.1.self_attn.q_proj.lora_B.default.weight": "model-00001-of-00004.safetensors", + "model.layers.1.self_attn.v_proj.base_layer.weight": "model-00001-of-00004.safetensors", + "model.layers.1.self_attn.v_proj.lora_A.default.weight": "model-00001-of-00004.safetensors", + "model.layers.1.self_attn.v_proj.lora_B.default.weight": "model-00001-of-00004.safetensors", + "model.layers.10.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.10.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.10.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.10.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.10.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.10.self_attn.k_proj.base_layer.weight": "model-00002-of-00004.safetensors", + "model.layers.10.self_attn.k_proj.lora_A.default.weight": "model-00002-of-00004.safetensors", + "model.layers.10.self_attn.k_proj.lora_B.default.weight": "model-00002-of-00004.safetensors", + "model.layers.10.self_attn.o_proj.base_layer.weight": "model-00002-of-00004.safetensors", + "model.layers.10.self_attn.o_proj.lora_A.default.weight": "model-00002-of-00004.safetensors", + "model.layers.10.self_attn.o_proj.lora_B.default.weight": "model-00002-of-00004.safetensors", + "model.layers.10.self_attn.q_proj.base_layer.weight": "model-00002-of-00004.safetensors", + "model.layers.10.self_attn.q_proj.lora_A.default.weight": "model-00002-of-00004.safetensors", + "model.layers.10.self_attn.q_proj.lora_B.default.weight": "model-00002-of-00004.safetensors", + "model.layers.10.self_attn.v_proj.base_layer.weight": "model-00002-of-00004.safetensors", + "model.layers.10.self_attn.v_proj.lora_A.default.weight": "model-00002-of-00004.safetensors", + "model.layers.10.self_attn.v_proj.lora_B.default.weight": "model-00002-of-00004.safetensors", + "model.layers.11.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.11.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.11.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.11.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.11.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.11.self_attn.k_proj.base_layer.weight": "model-00002-of-00004.safetensors", + "model.layers.11.self_attn.k_proj.lora_A.default.weight": "model-00002-of-00004.safetensors", + "model.layers.11.self_attn.k_proj.lora_B.default.weight": "model-00002-of-00004.safetensors", + "model.layers.11.self_attn.o_proj.base_layer.weight": "model-00002-of-00004.safetensors", + "model.layers.11.self_attn.o_proj.lora_A.default.weight": "model-00002-of-00004.safetensors", + "model.layers.11.self_attn.o_proj.lora_B.default.weight": "model-00002-of-00004.safetensors", + "model.layers.11.self_attn.q_proj.base_layer.weight": "model-00002-of-00004.safetensors", + "model.layers.11.self_attn.q_proj.lora_A.default.weight": "model-00002-of-00004.safetensors", + "model.layers.11.self_attn.q_proj.lora_B.default.weight": "model-00002-of-00004.safetensors", + "model.layers.11.self_attn.v_proj.base_layer.weight": "model-00002-of-00004.safetensors", + "model.layers.11.self_attn.v_proj.lora_A.default.weight": "model-00002-of-00004.safetensors", + "model.layers.11.self_attn.v_proj.lora_B.default.weight": "model-00002-of-00004.safetensors", + "model.layers.12.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.12.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.12.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.12.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.12.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.12.self_attn.k_proj.base_layer.weight": "model-00002-of-00004.safetensors", + "model.layers.12.self_attn.k_proj.lora_A.default.weight": "model-00002-of-00004.safetensors", + "model.layers.12.self_attn.k_proj.lora_B.default.weight": "model-00002-of-00004.safetensors", + "model.layers.12.self_attn.o_proj.base_layer.weight": "model-00002-of-00004.safetensors", + "model.layers.12.self_attn.o_proj.lora_A.default.weight": "model-00002-of-00004.safetensors", + "model.layers.12.self_attn.o_proj.lora_B.default.weight": "model-00002-of-00004.safetensors", + "model.layers.12.self_attn.q_proj.base_layer.weight": "model-00002-of-00004.safetensors", + "model.layers.12.self_attn.q_proj.lora_A.default.weight": "model-00002-of-00004.safetensors", + "model.layers.12.self_attn.q_proj.lora_B.default.weight": "model-00002-of-00004.safetensors", + "model.layers.12.self_attn.v_proj.base_layer.weight": "model-00002-of-00004.safetensors", + "model.layers.12.self_attn.v_proj.lora_A.default.weight": "model-00002-of-00004.safetensors", + "model.layers.12.self_attn.v_proj.lora_B.default.weight": "model-00002-of-00004.safetensors", + "model.layers.13.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.13.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.13.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.13.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.13.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.13.self_attn.k_proj.base_layer.weight": "model-00002-of-00004.safetensors", + "model.layers.13.self_attn.k_proj.lora_A.default.weight": "model-00002-of-00004.safetensors", + "model.layers.13.self_attn.k_proj.lora_B.default.weight": "model-00002-of-00004.safetensors", + "model.layers.13.self_attn.o_proj.base_layer.weight": "model-00002-of-00004.safetensors", + "model.layers.13.self_attn.o_proj.lora_A.default.weight": "model-00002-of-00004.safetensors", + "model.layers.13.self_attn.o_proj.lora_B.default.weight": "model-00002-of-00004.safetensors", + "model.layers.13.self_attn.q_proj.base_layer.weight": "model-00002-of-00004.safetensors", + "model.layers.13.self_attn.q_proj.lora_A.default.weight": "model-00002-of-00004.safetensors", + "model.layers.13.self_attn.q_proj.lora_B.default.weight": "model-00002-of-00004.safetensors", + "model.layers.13.self_attn.v_proj.base_layer.weight": "model-00002-of-00004.safetensors", + "model.layers.13.self_attn.v_proj.lora_A.default.weight": "model-00002-of-00004.safetensors", + "model.layers.13.self_attn.v_proj.lora_B.default.weight": "model-00002-of-00004.safetensors", + "model.layers.14.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.14.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.14.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.14.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.14.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.14.self_attn.k_proj.base_layer.weight": "model-00002-of-00004.safetensors", + "model.layers.14.self_attn.k_proj.lora_A.default.weight": "model-00002-of-00004.safetensors", + "model.layers.14.self_attn.k_proj.lora_B.default.weight": "model-00002-of-00004.safetensors", + "model.layers.14.self_attn.o_proj.base_layer.weight": "model-00002-of-00004.safetensors", + "model.layers.14.self_attn.o_proj.lora_A.default.weight": "model-00002-of-00004.safetensors", + "model.layers.14.self_attn.o_proj.lora_B.default.weight": "model-00002-of-00004.safetensors", + "model.layers.14.self_attn.q_proj.base_layer.weight": "model-00002-of-00004.safetensors", + "model.layers.14.self_attn.q_proj.lora_A.default.weight": "model-00002-of-00004.safetensors", + "model.layers.14.self_attn.q_proj.lora_B.default.weight": "model-00002-of-00004.safetensors", + "model.layers.14.self_attn.v_proj.base_layer.weight": "model-00002-of-00004.safetensors", + "model.layers.14.self_attn.v_proj.lora_A.default.weight": "model-00002-of-00004.safetensors", + "model.layers.14.self_attn.v_proj.lora_B.default.weight": "model-00002-of-00004.safetensors", + "model.layers.15.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.15.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.15.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.15.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.15.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.15.self_attn.k_proj.base_layer.weight": "model-00002-of-00004.safetensors", + "model.layers.15.self_attn.k_proj.lora_A.default.weight": "model-00002-of-00004.safetensors", + "model.layers.15.self_attn.k_proj.lora_B.default.weight": "model-00002-of-00004.safetensors", + "model.layers.15.self_attn.o_proj.base_layer.weight": "model-00002-of-00004.safetensors", + "model.layers.15.self_attn.o_proj.lora_A.default.weight": "model-00002-of-00004.safetensors", + "model.layers.15.self_attn.o_proj.lora_B.default.weight": "model-00002-of-00004.safetensors", + "model.layers.15.self_attn.q_proj.base_layer.weight": "model-00002-of-00004.safetensors", + "model.layers.15.self_attn.q_proj.lora_A.default.weight": "model-00002-of-00004.safetensors", + "model.layers.15.self_attn.q_proj.lora_B.default.weight": "model-00002-of-00004.safetensors", + "model.layers.15.self_attn.v_proj.base_layer.weight": "model-00002-of-00004.safetensors", + "model.layers.15.self_attn.v_proj.lora_A.default.weight": "model-00002-of-00004.safetensors", + "model.layers.15.self_attn.v_proj.lora_B.default.weight": "model-00002-of-00004.safetensors", + "model.layers.16.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.16.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.16.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.16.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.16.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.16.self_attn.k_proj.base_layer.weight": "model-00002-of-00004.safetensors", + "model.layers.16.self_attn.k_proj.lora_A.default.weight": "model-00002-of-00004.safetensors", + "model.layers.16.self_attn.k_proj.lora_B.default.weight": "model-00002-of-00004.safetensors", + "model.layers.16.self_attn.o_proj.base_layer.weight": "model-00002-of-00004.safetensors", + "model.layers.16.self_attn.o_proj.lora_A.default.weight": "model-00002-of-00004.safetensors", + "model.layers.16.self_attn.o_proj.lora_B.default.weight": "model-00002-of-00004.safetensors", + "model.layers.16.self_attn.q_proj.base_layer.weight": "model-00002-of-00004.safetensors", + "model.layers.16.self_attn.q_proj.lora_A.default.weight": "model-00002-of-00004.safetensors", + "model.layers.16.self_attn.q_proj.lora_B.default.weight": "model-00002-of-00004.safetensors", + "model.layers.16.self_attn.v_proj.base_layer.weight": "model-00002-of-00004.safetensors", + "model.layers.16.self_attn.v_proj.lora_A.default.weight": "model-00002-of-00004.safetensors", + "model.layers.16.self_attn.v_proj.lora_B.default.weight": "model-00002-of-00004.safetensors", + "model.layers.17.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.17.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.17.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.17.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.17.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.17.self_attn.k_proj.base_layer.weight": "model-00002-of-00004.safetensors", + "model.layers.17.self_attn.k_proj.lora_A.default.weight": "model-00002-of-00004.safetensors", + "model.layers.17.self_attn.k_proj.lora_B.default.weight": "model-00002-of-00004.safetensors", + "model.layers.17.self_attn.o_proj.base_layer.weight": "model-00002-of-00004.safetensors", + "model.layers.17.self_attn.o_proj.lora_A.default.weight": "model-00002-of-00004.safetensors", + "model.layers.17.self_attn.o_proj.lora_B.default.weight": "model-00002-of-00004.safetensors", + "model.layers.17.self_attn.q_proj.base_layer.weight": "model-00002-of-00004.safetensors", + "model.layers.17.self_attn.q_proj.lora_A.default.weight": "model-00002-of-00004.safetensors", + "model.layers.17.self_attn.q_proj.lora_B.default.weight": "model-00002-of-00004.safetensors", + "model.layers.17.self_attn.v_proj.base_layer.weight": "model-00002-of-00004.safetensors", + "model.layers.17.self_attn.v_proj.lora_A.default.weight": "model-00002-of-00004.safetensors", + "model.layers.17.self_attn.v_proj.lora_B.default.weight": "model-00002-of-00004.safetensors", + "model.layers.18.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.18.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.18.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.18.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.18.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.18.self_attn.k_proj.base_layer.weight": "model-00002-of-00004.safetensors", + "model.layers.18.self_attn.k_proj.lora_A.default.weight": "model-00002-of-00004.safetensors", + "model.layers.18.self_attn.k_proj.lora_B.default.weight": "model-00002-of-00004.safetensors", + "model.layers.18.self_attn.o_proj.base_layer.weight": "model-00002-of-00004.safetensors", + "model.layers.18.self_attn.o_proj.lora_A.default.weight": "model-00002-of-00004.safetensors", + "model.layers.18.self_attn.o_proj.lora_B.default.weight": "model-00002-of-00004.safetensors", + "model.layers.18.self_attn.q_proj.base_layer.weight": "model-00002-of-00004.safetensors", + "model.layers.18.self_attn.q_proj.lora_A.default.weight": "model-00002-of-00004.safetensors", + "model.layers.18.self_attn.q_proj.lora_B.default.weight": "model-00002-of-00004.safetensors", + "model.layers.18.self_attn.v_proj.base_layer.weight": "model-00002-of-00004.safetensors", + "model.layers.18.self_attn.v_proj.lora_A.default.weight": "model-00002-of-00004.safetensors", + "model.layers.18.self_attn.v_proj.lora_B.default.weight": "model-00002-of-00004.safetensors", + "model.layers.19.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.19.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.19.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.19.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.19.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.19.self_attn.k_proj.base_layer.weight": "model-00002-of-00004.safetensors", + "model.layers.19.self_attn.k_proj.lora_A.default.weight": "model-00002-of-00004.safetensors", + "model.layers.19.self_attn.k_proj.lora_B.default.weight": "model-00002-of-00004.safetensors", + "model.layers.19.self_attn.o_proj.base_layer.weight": "model-00002-of-00004.safetensors", + "model.layers.19.self_attn.o_proj.lora_A.default.weight": "model-00002-of-00004.safetensors", + "model.layers.19.self_attn.o_proj.lora_B.default.weight": "model-00002-of-00004.safetensors", + "model.layers.19.self_attn.q_proj.base_layer.weight": "model-00002-of-00004.safetensors", + "model.layers.19.self_attn.q_proj.lora_A.default.weight": "model-00002-of-00004.safetensors", + "model.layers.19.self_attn.q_proj.lora_B.default.weight": "model-00002-of-00004.safetensors", + "model.layers.19.self_attn.v_proj.base_layer.weight": "model-00002-of-00004.safetensors", + "model.layers.19.self_attn.v_proj.lora_A.default.weight": "model-00002-of-00004.safetensors", + "model.layers.19.self_attn.v_proj.lora_B.default.weight": "model-00002-of-00004.safetensors", + "model.layers.2.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.2.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.2.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.2.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.2.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.2.self_attn.k_proj.base_layer.weight": "model-00001-of-00004.safetensors", + "model.layers.2.self_attn.k_proj.lora_A.default.weight": "model-00001-of-00004.safetensors", + "model.layers.2.self_attn.k_proj.lora_B.default.weight": "model-00001-of-00004.safetensors", + "model.layers.2.self_attn.o_proj.base_layer.weight": "model-00001-of-00004.safetensors", + "model.layers.2.self_attn.o_proj.lora_A.default.weight": "model-00001-of-00004.safetensors", + "model.layers.2.self_attn.o_proj.lora_B.default.weight": "model-00001-of-00004.safetensors", + "model.layers.2.self_attn.q_proj.base_layer.weight": "model-00001-of-00004.safetensors", + "model.layers.2.self_attn.q_proj.lora_A.default.weight": "model-00001-of-00004.safetensors", + "model.layers.2.self_attn.q_proj.lora_B.default.weight": "model-00001-of-00004.safetensors", + "model.layers.2.self_attn.v_proj.base_layer.weight": "model-00001-of-00004.safetensors", + "model.layers.2.self_attn.v_proj.lora_A.default.weight": "model-00001-of-00004.safetensors", + "model.layers.2.self_attn.v_proj.lora_B.default.weight": "model-00001-of-00004.safetensors", + "model.layers.20.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.20.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.20.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.20.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.20.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.20.self_attn.k_proj.base_layer.weight": "model-00002-of-00004.safetensors", + "model.layers.20.self_attn.k_proj.lora_A.default.weight": "model-00002-of-00004.safetensors", + "model.layers.20.self_attn.k_proj.lora_B.default.weight": "model-00002-of-00004.safetensors", + "model.layers.20.self_attn.o_proj.base_layer.weight": "model-00002-of-00004.safetensors", + "model.layers.20.self_attn.o_proj.lora_A.default.weight": "model-00002-of-00004.safetensors", + "model.layers.20.self_attn.o_proj.lora_B.default.weight": "model-00002-of-00004.safetensors", + "model.layers.20.self_attn.q_proj.base_layer.weight": "model-00002-of-00004.safetensors", + "model.layers.20.self_attn.q_proj.lora_A.default.weight": "model-00002-of-00004.safetensors", + "model.layers.20.self_attn.q_proj.lora_B.default.weight": "model-00002-of-00004.safetensors", + "model.layers.20.self_attn.v_proj.base_layer.weight": "model-00002-of-00004.safetensors", + "model.layers.20.self_attn.v_proj.lora_A.default.weight": "model-00002-of-00004.safetensors", + "model.layers.20.self_attn.v_proj.lora_B.default.weight": "model-00002-of-00004.safetensors", + "model.layers.21.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.21.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.21.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.21.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.21.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.21.self_attn.k_proj.base_layer.weight": "model-00003-of-00004.safetensors", + "model.layers.21.self_attn.k_proj.lora_A.default.weight": "model-00003-of-00004.safetensors", + "model.layers.21.self_attn.k_proj.lora_B.default.weight": "model-00003-of-00004.safetensors", + "model.layers.21.self_attn.o_proj.base_layer.weight": "model-00003-of-00004.safetensors", + "model.layers.21.self_attn.o_proj.lora_A.default.weight": "model-00003-of-00004.safetensors", + "model.layers.21.self_attn.o_proj.lora_B.default.weight": "model-00003-of-00004.safetensors", + "model.layers.21.self_attn.q_proj.base_layer.weight": "model-00003-of-00004.safetensors", + "model.layers.21.self_attn.q_proj.lora_A.default.weight": "model-00003-of-00004.safetensors", + "model.layers.21.self_attn.q_proj.lora_B.default.weight": "model-00003-of-00004.safetensors", + "model.layers.21.self_attn.v_proj.base_layer.weight": "model-00003-of-00004.safetensors", + "model.layers.21.self_attn.v_proj.lora_A.default.weight": "model-00003-of-00004.safetensors", + "model.layers.21.self_attn.v_proj.lora_B.default.weight": "model-00003-of-00004.safetensors", + "model.layers.22.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.22.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.22.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.22.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.22.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.22.self_attn.k_proj.base_layer.weight": "model-00003-of-00004.safetensors", + "model.layers.22.self_attn.k_proj.lora_A.default.weight": "model-00003-of-00004.safetensors", + "model.layers.22.self_attn.k_proj.lora_B.default.weight": "model-00003-of-00004.safetensors", + "model.layers.22.self_attn.o_proj.base_layer.weight": "model-00003-of-00004.safetensors", + "model.layers.22.self_attn.o_proj.lora_A.default.weight": "model-00003-of-00004.safetensors", + "model.layers.22.self_attn.o_proj.lora_B.default.weight": "model-00003-of-00004.safetensors", + "model.layers.22.self_attn.q_proj.base_layer.weight": "model-00003-of-00004.safetensors", + "model.layers.22.self_attn.q_proj.lora_A.default.weight": "model-00003-of-00004.safetensors", + "model.layers.22.self_attn.q_proj.lora_B.default.weight": "model-00003-of-00004.safetensors", + "model.layers.22.self_attn.v_proj.base_layer.weight": "model-00003-of-00004.safetensors", + "model.layers.22.self_attn.v_proj.lora_A.default.weight": "model-00003-of-00004.safetensors", + "model.layers.22.self_attn.v_proj.lora_B.default.weight": "model-00003-of-00004.safetensors", + "model.layers.23.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.23.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.23.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.23.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.23.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.23.self_attn.k_proj.base_layer.weight": "model-00003-of-00004.safetensors", + "model.layers.23.self_attn.k_proj.lora_A.default.weight": "model-00003-of-00004.safetensors", + "model.layers.23.self_attn.k_proj.lora_B.default.weight": "model-00003-of-00004.safetensors", + "model.layers.23.self_attn.o_proj.base_layer.weight": "model-00003-of-00004.safetensors", + "model.layers.23.self_attn.o_proj.lora_A.default.weight": "model-00003-of-00004.safetensors", + "model.layers.23.self_attn.o_proj.lora_B.default.weight": "model-00003-of-00004.safetensors", + "model.layers.23.self_attn.q_proj.base_layer.weight": "model-00003-of-00004.safetensors", + "model.layers.23.self_attn.q_proj.lora_A.default.weight": "model-00003-of-00004.safetensors", + "model.layers.23.self_attn.q_proj.lora_B.default.weight": "model-00003-of-00004.safetensors", + "model.layers.23.self_attn.v_proj.base_layer.weight": "model-00003-of-00004.safetensors", + "model.layers.23.self_attn.v_proj.lora_A.default.weight": "model-00003-of-00004.safetensors", + "model.layers.23.self_attn.v_proj.lora_B.default.weight": "model-00003-of-00004.safetensors", + "model.layers.24.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.24.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.24.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.24.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.24.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.24.self_attn.k_proj.base_layer.weight": "model-00003-of-00004.safetensors", + "model.layers.24.self_attn.k_proj.lora_A.default.weight": "model-00003-of-00004.safetensors", + "model.layers.24.self_attn.k_proj.lora_B.default.weight": "model-00003-of-00004.safetensors", + "model.layers.24.self_attn.o_proj.base_layer.weight": "model-00003-of-00004.safetensors", + "model.layers.24.self_attn.o_proj.lora_A.default.weight": "model-00003-of-00004.safetensors", + "model.layers.24.self_attn.o_proj.lora_B.default.weight": "model-00003-of-00004.safetensors", + "model.layers.24.self_attn.q_proj.base_layer.weight": "model-00003-of-00004.safetensors", + "model.layers.24.self_attn.q_proj.lora_A.default.weight": "model-00003-of-00004.safetensors", + "model.layers.24.self_attn.q_proj.lora_B.default.weight": "model-00003-of-00004.safetensors", + "model.layers.24.self_attn.v_proj.base_layer.weight": "model-00003-of-00004.safetensors", + "model.layers.24.self_attn.v_proj.lora_A.default.weight": "model-00003-of-00004.safetensors", + "model.layers.24.self_attn.v_proj.lora_B.default.weight": "model-00003-of-00004.safetensors", + "model.layers.25.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.25.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.25.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.25.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.25.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.25.self_attn.k_proj.base_layer.weight": "model-00003-of-00004.safetensors", + "model.layers.25.self_attn.k_proj.lora_A.default.weight": "model-00003-of-00004.safetensors", + "model.layers.25.self_attn.k_proj.lora_B.default.weight": "model-00003-of-00004.safetensors", + "model.layers.25.self_attn.o_proj.base_layer.weight": "model-00003-of-00004.safetensors", + "model.layers.25.self_attn.o_proj.lora_A.default.weight": "model-00003-of-00004.safetensors", + "model.layers.25.self_attn.o_proj.lora_B.default.weight": "model-00003-of-00004.safetensors", + "model.layers.25.self_attn.q_proj.base_layer.weight": "model-00003-of-00004.safetensors", + "model.layers.25.self_attn.q_proj.lora_A.default.weight": "model-00003-of-00004.safetensors", + "model.layers.25.self_attn.q_proj.lora_B.default.weight": "model-00003-of-00004.safetensors", + "model.layers.25.self_attn.v_proj.base_layer.weight": "model-00003-of-00004.safetensors", + "model.layers.25.self_attn.v_proj.lora_A.default.weight": "model-00003-of-00004.safetensors", + "model.layers.25.self_attn.v_proj.lora_B.default.weight": "model-00003-of-00004.safetensors", + "model.layers.26.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.26.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.26.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.26.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.26.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.26.self_attn.k_proj.base_layer.weight": "model-00003-of-00004.safetensors", + "model.layers.26.self_attn.k_proj.lora_A.default.weight": "model-00003-of-00004.safetensors", + "model.layers.26.self_attn.k_proj.lora_B.default.weight": "model-00003-of-00004.safetensors", + "model.layers.26.self_attn.o_proj.base_layer.weight": "model-00003-of-00004.safetensors", + "model.layers.26.self_attn.o_proj.lora_A.default.weight": "model-00003-of-00004.safetensors", + "model.layers.26.self_attn.o_proj.lora_B.default.weight": "model-00003-of-00004.safetensors", + "model.layers.26.self_attn.q_proj.base_layer.weight": "model-00003-of-00004.safetensors", + "model.layers.26.self_attn.q_proj.lora_A.default.weight": "model-00003-of-00004.safetensors", + "model.layers.26.self_attn.q_proj.lora_B.default.weight": "model-00003-of-00004.safetensors", + "model.layers.26.self_attn.v_proj.base_layer.weight": "model-00003-of-00004.safetensors", + "model.layers.26.self_attn.v_proj.lora_A.default.weight": "model-00003-of-00004.safetensors", + "model.layers.26.self_attn.v_proj.lora_B.default.weight": "model-00003-of-00004.safetensors", + "model.layers.27.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.27.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.27.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.27.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.27.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.27.self_attn.k_proj.base_layer.weight": "model-00003-of-00004.safetensors", + "model.layers.27.self_attn.k_proj.lora_A.default.weight": "model-00003-of-00004.safetensors", + "model.layers.27.self_attn.k_proj.lora_B.default.weight": "model-00003-of-00004.safetensors", + "model.layers.27.self_attn.o_proj.base_layer.weight": "model-00003-of-00004.safetensors", + "model.layers.27.self_attn.o_proj.lora_A.default.weight": "model-00003-of-00004.safetensors", + "model.layers.27.self_attn.o_proj.lora_B.default.weight": "model-00003-of-00004.safetensors", + "model.layers.27.self_attn.q_proj.base_layer.weight": "model-00003-of-00004.safetensors", + "model.layers.27.self_attn.q_proj.lora_A.default.weight": "model-00003-of-00004.safetensors", + "model.layers.27.self_attn.q_proj.lora_B.default.weight": "model-00003-of-00004.safetensors", + "model.layers.27.self_attn.v_proj.base_layer.weight": "model-00003-of-00004.safetensors", + "model.layers.27.self_attn.v_proj.lora_A.default.weight": "model-00003-of-00004.safetensors", + "model.layers.27.self_attn.v_proj.lora_B.default.weight": "model-00003-of-00004.safetensors", + "model.layers.28.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.28.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.28.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.28.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.28.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.28.self_attn.k_proj.base_layer.weight": "model-00003-of-00004.safetensors", + "model.layers.28.self_attn.k_proj.lora_A.default.weight": "model-00003-of-00004.safetensors", + "model.layers.28.self_attn.k_proj.lora_B.default.weight": "model-00003-of-00004.safetensors", + "model.layers.28.self_attn.o_proj.base_layer.weight": "model-00003-of-00004.safetensors", + "model.layers.28.self_attn.o_proj.lora_A.default.weight": "model-00003-of-00004.safetensors", + "model.layers.28.self_attn.o_proj.lora_B.default.weight": "model-00003-of-00004.safetensors", + "model.layers.28.self_attn.q_proj.base_layer.weight": "model-00003-of-00004.safetensors", + "model.layers.28.self_attn.q_proj.lora_A.default.weight": "model-00003-of-00004.safetensors", + "model.layers.28.self_attn.q_proj.lora_B.default.weight": "model-00003-of-00004.safetensors", + "model.layers.28.self_attn.v_proj.base_layer.weight": "model-00003-of-00004.safetensors", + "model.layers.28.self_attn.v_proj.lora_A.default.weight": "model-00003-of-00004.safetensors", + "model.layers.28.self_attn.v_proj.lora_B.default.weight": "model-00003-of-00004.safetensors", + "model.layers.29.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.29.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.29.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.29.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.29.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.29.self_attn.k_proj.base_layer.weight": "model-00003-of-00004.safetensors", + "model.layers.29.self_attn.k_proj.lora_A.default.weight": "model-00003-of-00004.safetensors", + "model.layers.29.self_attn.k_proj.lora_B.default.weight": "model-00003-of-00004.safetensors", + "model.layers.29.self_attn.o_proj.base_layer.weight": "model-00003-of-00004.safetensors", + "model.layers.29.self_attn.o_proj.lora_A.default.weight": "model-00003-of-00004.safetensors", + "model.layers.29.self_attn.o_proj.lora_B.default.weight": "model-00003-of-00004.safetensors", + "model.layers.29.self_attn.q_proj.base_layer.weight": "model-00003-of-00004.safetensors", + "model.layers.29.self_attn.q_proj.lora_A.default.weight": "model-00003-of-00004.safetensors", + "model.layers.29.self_attn.q_proj.lora_B.default.weight": "model-00003-of-00004.safetensors", + "model.layers.29.self_attn.v_proj.base_layer.weight": "model-00003-of-00004.safetensors", + "model.layers.29.self_attn.v_proj.lora_A.default.weight": "model-00003-of-00004.safetensors", + "model.layers.29.self_attn.v_proj.lora_B.default.weight": "model-00003-of-00004.safetensors", + "model.layers.3.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.3.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.3.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.3.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.3.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.3.self_attn.k_proj.base_layer.weight": "model-00001-of-00004.safetensors", + "model.layers.3.self_attn.k_proj.lora_A.default.weight": "model-00001-of-00004.safetensors", + "model.layers.3.self_attn.k_proj.lora_B.default.weight": "model-00001-of-00004.safetensors", + "model.layers.3.self_attn.o_proj.base_layer.weight": "model-00001-of-00004.safetensors", + "model.layers.3.self_attn.o_proj.lora_A.default.weight": "model-00001-of-00004.safetensors", + "model.layers.3.self_attn.o_proj.lora_B.default.weight": "model-00001-of-00004.safetensors", + "model.layers.3.self_attn.q_proj.base_layer.weight": "model-00001-of-00004.safetensors", + "model.layers.3.self_attn.q_proj.lora_A.default.weight": "model-00001-of-00004.safetensors", + "model.layers.3.self_attn.q_proj.lora_B.default.weight": "model-00001-of-00004.safetensors", + "model.layers.3.self_attn.v_proj.base_layer.weight": "model-00001-of-00004.safetensors", + "model.layers.3.self_attn.v_proj.lora_A.default.weight": "model-00001-of-00004.safetensors", + "model.layers.3.self_attn.v_proj.lora_B.default.weight": "model-00001-of-00004.safetensors", + "model.layers.30.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.30.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.30.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.30.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.30.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.30.self_attn.k_proj.base_layer.weight": "model-00003-of-00004.safetensors", + "model.layers.30.self_attn.k_proj.lora_A.default.weight": "model-00003-of-00004.safetensors", + "model.layers.30.self_attn.k_proj.lora_B.default.weight": "model-00003-of-00004.safetensors", + "model.layers.30.self_attn.o_proj.base_layer.weight": "model-00003-of-00004.safetensors", + "model.layers.30.self_attn.o_proj.lora_A.default.weight": "model-00003-of-00004.safetensors", + "model.layers.30.self_attn.o_proj.lora_B.default.weight": "model-00003-of-00004.safetensors", + "model.layers.30.self_attn.q_proj.base_layer.weight": "model-00003-of-00004.safetensors", + "model.layers.30.self_attn.q_proj.lora_A.default.weight": "model-00003-of-00004.safetensors", + "model.layers.30.self_attn.q_proj.lora_B.default.weight": "model-00003-of-00004.safetensors", + "model.layers.30.self_attn.v_proj.base_layer.weight": "model-00003-of-00004.safetensors", + "model.layers.30.self_attn.v_proj.lora_A.default.weight": "model-00003-of-00004.safetensors", + "model.layers.30.self_attn.v_proj.lora_B.default.weight": "model-00003-of-00004.safetensors", + "model.layers.31.input_layernorm.weight": "model-00004-of-00004.safetensors", + "model.layers.31.mlp.down_proj.weight": "model-00004-of-00004.safetensors", + "model.layers.31.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.31.mlp.up_proj.weight": "model-00004-of-00004.safetensors", + "model.layers.31.post_attention_layernorm.weight": "model-00004-of-00004.safetensors", + "model.layers.31.self_attn.k_proj.base_layer.weight": "model-00003-of-00004.safetensors", + "model.layers.31.self_attn.k_proj.lora_A.default.weight": "model-00003-of-00004.safetensors", + "model.layers.31.self_attn.k_proj.lora_B.default.weight": "model-00003-of-00004.safetensors", + "model.layers.31.self_attn.o_proj.base_layer.weight": "model-00003-of-00004.safetensors", + "model.layers.31.self_attn.o_proj.lora_A.default.weight": "model-00003-of-00004.safetensors", + "model.layers.31.self_attn.o_proj.lora_B.default.weight": "model-00003-of-00004.safetensors", + "model.layers.31.self_attn.q_proj.base_layer.weight": "model-00003-of-00004.safetensors", + "model.layers.31.self_attn.q_proj.lora_A.default.weight": "model-00003-of-00004.safetensors", + "model.layers.31.self_attn.q_proj.lora_B.default.weight": "model-00003-of-00004.safetensors", + "model.layers.31.self_attn.v_proj.base_layer.weight": "model-00003-of-00004.safetensors", + "model.layers.31.self_attn.v_proj.lora_A.default.weight": "model-00003-of-00004.safetensors", + "model.layers.31.self_attn.v_proj.lora_B.default.weight": "model-00003-of-00004.safetensors", + "model.layers.4.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.4.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.4.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.4.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.4.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.4.self_attn.k_proj.base_layer.weight": "model-00001-of-00004.safetensors", + "model.layers.4.self_attn.k_proj.lora_A.default.weight": "model-00001-of-00004.safetensors", + "model.layers.4.self_attn.k_proj.lora_B.default.weight": "model-00001-of-00004.safetensors", + "model.layers.4.self_attn.o_proj.base_layer.weight": "model-00001-of-00004.safetensors", + "model.layers.4.self_attn.o_proj.lora_A.default.weight": "model-00001-of-00004.safetensors", + "model.layers.4.self_attn.o_proj.lora_B.default.weight": "model-00001-of-00004.safetensors", + "model.layers.4.self_attn.q_proj.base_layer.weight": "model-00001-of-00004.safetensors", + "model.layers.4.self_attn.q_proj.lora_A.default.weight": "model-00001-of-00004.safetensors", + "model.layers.4.self_attn.q_proj.lora_B.default.weight": "model-00001-of-00004.safetensors", + "model.layers.4.self_attn.v_proj.base_layer.weight": "model-00001-of-00004.safetensors", + "model.layers.4.self_attn.v_proj.lora_A.default.weight": "model-00001-of-00004.safetensors", + "model.layers.4.self_attn.v_proj.lora_B.default.weight": "model-00001-of-00004.safetensors", + "model.layers.5.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.5.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.5.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.5.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.5.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.5.self_attn.k_proj.base_layer.weight": "model-00001-of-00004.safetensors", + "model.layers.5.self_attn.k_proj.lora_A.default.weight": "model-00001-of-00004.safetensors", + "model.layers.5.self_attn.k_proj.lora_B.default.weight": "model-00001-of-00004.safetensors", + "model.layers.5.self_attn.o_proj.base_layer.weight": "model-00001-of-00004.safetensors", + "model.layers.5.self_attn.o_proj.lora_A.default.weight": "model-00001-of-00004.safetensors", + "model.layers.5.self_attn.o_proj.lora_B.default.weight": "model-00001-of-00004.safetensors", + "model.layers.5.self_attn.q_proj.base_layer.weight": "model-00001-of-00004.safetensors", + "model.layers.5.self_attn.q_proj.lora_A.default.weight": "model-00001-of-00004.safetensors", + "model.layers.5.self_attn.q_proj.lora_B.default.weight": "model-00001-of-00004.safetensors", + "model.layers.5.self_attn.v_proj.base_layer.weight": "model-00001-of-00004.safetensors", + "model.layers.5.self_attn.v_proj.lora_A.default.weight": "model-00001-of-00004.safetensors", + "model.layers.5.self_attn.v_proj.lora_B.default.weight": "model-00001-of-00004.safetensors", + "model.layers.6.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.6.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.6.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.6.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.6.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.6.self_attn.k_proj.base_layer.weight": "model-00001-of-00004.safetensors", + "model.layers.6.self_attn.k_proj.lora_A.default.weight": "model-00001-of-00004.safetensors", + "model.layers.6.self_attn.k_proj.lora_B.default.weight": "model-00001-of-00004.safetensors", + "model.layers.6.self_attn.o_proj.base_layer.weight": "model-00001-of-00004.safetensors", + "model.layers.6.self_attn.o_proj.lora_A.default.weight": "model-00001-of-00004.safetensors", + "model.layers.6.self_attn.o_proj.lora_B.default.weight": "model-00001-of-00004.safetensors", + "model.layers.6.self_attn.q_proj.base_layer.weight": "model-00001-of-00004.safetensors", + "model.layers.6.self_attn.q_proj.lora_A.default.weight": "model-00001-of-00004.safetensors", + "model.layers.6.self_attn.q_proj.lora_B.default.weight": "model-00001-of-00004.safetensors", + "model.layers.6.self_attn.v_proj.base_layer.weight": "model-00001-of-00004.safetensors", + "model.layers.6.self_attn.v_proj.lora_A.default.weight": "model-00001-of-00004.safetensors", + "model.layers.6.self_attn.v_proj.lora_B.default.weight": "model-00001-of-00004.safetensors", + "model.layers.7.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.7.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.7.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.7.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.7.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.7.self_attn.k_proj.base_layer.weight": "model-00001-of-00004.safetensors", + "model.layers.7.self_attn.k_proj.lora_A.default.weight": "model-00001-of-00004.safetensors", + "model.layers.7.self_attn.k_proj.lora_B.default.weight": "model-00001-of-00004.safetensors", + "model.layers.7.self_attn.o_proj.base_layer.weight": "model-00001-of-00004.safetensors", + "model.layers.7.self_attn.o_proj.lora_A.default.weight": "model-00001-of-00004.safetensors", + "model.layers.7.self_attn.o_proj.lora_B.default.weight": "model-00001-of-00004.safetensors", + "model.layers.7.self_attn.q_proj.base_layer.weight": "model-00001-of-00004.safetensors", + "model.layers.7.self_attn.q_proj.lora_A.default.weight": "model-00001-of-00004.safetensors", + "model.layers.7.self_attn.q_proj.lora_B.default.weight": "model-00001-of-00004.safetensors", + "model.layers.7.self_attn.v_proj.base_layer.weight": "model-00001-of-00004.safetensors", + "model.layers.7.self_attn.v_proj.lora_A.default.weight": "model-00001-of-00004.safetensors", + "model.layers.7.self_attn.v_proj.lora_B.default.weight": "model-00001-of-00004.safetensors", + "model.layers.8.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.8.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.8.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.8.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.8.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.8.self_attn.k_proj.base_layer.weight": "model-00001-of-00004.safetensors", + "model.layers.8.self_attn.k_proj.lora_A.default.weight": "model-00001-of-00004.safetensors", + "model.layers.8.self_attn.k_proj.lora_B.default.weight": "model-00001-of-00004.safetensors", + "model.layers.8.self_attn.o_proj.base_layer.weight": "model-00001-of-00004.safetensors", + "model.layers.8.self_attn.o_proj.lora_A.default.weight": "model-00001-of-00004.safetensors", + "model.layers.8.self_attn.o_proj.lora_B.default.weight": "model-00001-of-00004.safetensors", + "model.layers.8.self_attn.q_proj.base_layer.weight": "model-00001-of-00004.safetensors", + "model.layers.8.self_attn.q_proj.lora_A.default.weight": "model-00001-of-00004.safetensors", + "model.layers.8.self_attn.q_proj.lora_B.default.weight": "model-00001-of-00004.safetensors", + "model.layers.8.self_attn.v_proj.base_layer.weight": "model-00001-of-00004.safetensors", + "model.layers.8.self_attn.v_proj.lora_A.default.weight": "model-00001-of-00004.safetensors", + "model.layers.8.self_attn.v_proj.lora_B.default.weight": "model-00001-of-00004.safetensors", + "model.layers.9.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.9.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.9.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.9.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.9.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.9.self_attn.k_proj.base_layer.weight": "model-00002-of-00004.safetensors", + "model.layers.9.self_attn.k_proj.lora_A.default.weight": "model-00002-of-00004.safetensors", + "model.layers.9.self_attn.k_proj.lora_B.default.weight": "model-00002-of-00004.safetensors", + "model.layers.9.self_attn.o_proj.base_layer.weight": "model-00002-of-00004.safetensors", + "model.layers.9.self_attn.o_proj.lora_A.default.weight": "model-00002-of-00004.safetensors", + "model.layers.9.self_attn.o_proj.lora_B.default.weight": "model-00002-of-00004.safetensors", + "model.layers.9.self_attn.q_proj.base_layer.weight": "model-00002-of-00004.safetensors", + "model.layers.9.self_attn.q_proj.lora_A.default.weight": "model-00002-of-00004.safetensors", + "model.layers.9.self_attn.q_proj.lora_B.default.weight": "model-00002-of-00004.safetensors", + "model.layers.9.self_attn.v_proj.base_layer.weight": "model-00002-of-00004.safetensors", + "model.layers.9.self_attn.v_proj.lora_A.default.weight": "model-00002-of-00004.safetensors", + "model.layers.9.self_attn.v_proj.lora_B.default.weight": "model-00002-of-00004.safetensors", + "model.norm.weight": "model-00004-of-00004.safetensors" + } +} diff --git a/final/special_tokens_map.json b/final/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..1c69f55fd7bb10288d16d4c76497e21e25adfe4f --- /dev/null +++ b/final/special_tokens_map.json @@ -0,0 +1,27 @@ +{ + "additional_special_tokens": [ + "<|eot_id|>", + "<|eom_id|>" + ], + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/final/tokenizer.json b/final/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..1c1d8d5c9024994f1d3b00f9662b8dd89ca13cf2 --- /dev/null +++ b/final/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b9e4e7fb171f92fd137b777cc2714bf87d11576700a1dcd7a399e7bbe39537b +size 17209920 diff --git a/final/tokenizer_config.json b/final/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..3c3fc3b2c234aa8b39e8ee01e3ee9e6fe54a2a1f --- /dev/null +++ b/final/tokenizer_config.json @@ -0,0 +1,2070 @@ +{ + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|python_tag|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "additional_special_tokens": [ + "<|eot_id|>", + "<|eom_id|>" + ], + "bos_token": "<|begin_of_text|>", + "chat_template": "{{- bos_token }}\n{%- if custom_tools is defined %}\n {%- set tools = custom_tools %}\n{%- endif %}\n{%- if not tools_in_user_message is defined %}\n {%- set tools_in_user_message = true %}\n{%- endif %}\n{%- if not tools is defined %}\n {%- set tools = none %}\n{%- endif %}\n\n{#- Extract system message #}\n{{- \"<|start_header_id|>system<|end_header_id|>\\n\\n\" }}\n{%- if messages[0]['role'] == 'system' %}\n {%- set system_message = messages[0]['content'] | trim %}\n {%- set messages = messages[1:] %}\n {{- system_message + \"\\n\" }}\n{%- else %}\n {%- set system_message = \"You are a helpful assistant that can use tools. You are developed by Salesforce xLAM team.\" %}\n {% set format_instruction %}You have access to a set of tools. When using tools, make calls in a single JSON array: \n\n[{\"name\": \"tool_call_name\", \"arguments\": {\"arg1\": \"value1\", \"arg2\": \"value2\"}}, ... (additional parallel tool calls as needed)]\n\nIf no tool is suitable, state that explicitly. If the user's input lacks required parameters, ask for clarification. Do not interpret or respond until tool results are returned. Once they are available, process them or make additional calls if needed. For tasks that don't require tools, such as casual conversation or general advice, respond directly in plain text. The available tools are:{% endset %}\n {{- system_message + \"\\n\" }}\n {%- if tools is not none %}\n {{- format_instruction + \"\\n\\n\" }}\n {%- endif %}\n{%- endif %}\n\n\n{%- if tools is not none %}\n {%- for t in tools %}\n {{- t | tojson(indent=4) }}\n {{- \"\\n\\n\" }}\n {%- endfor %}\n{%- endif %}\n{{- \"<|eot_id|>\" }}\n\n{%- for message in messages %}\n {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %}\n {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\\n\\n'+ message['content'] | trim + '<|eot_id|>' }}\n {%- elif 'tool_calls' in message %}\n {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' -}}\n {%- if message['tool_calls'] %}\n {{- \"[\" }}\n {%- for tool_call_function in message.tool_calls %}\n {%- set tool_call = tool_call_function.function %}\n {{- '{\"name\": \"' + tool_call.name + '\", ' }}\n {{- '\"arguments\": ' }}\n {{- tool_call.arguments | tojson }}\n {{- \"}\" }}\n {%- if not loop.last %}\n {{- \", \" }}\n {%- endif %}\n {%- endfor %}\n {{- \"]\" }}\n {{- \"<|eot_id|>\" }}\n {%- elif message['content'] %}\n {{- message['content'] | trim + '<|eot_id|>' }}\n {%- else %}\n {{- \"[]\\n\" + '<|eot_id|>' }}\n {%- endif %}\n {%- elif message.role == \"tool\" or message.role == \"ipython\" %}\n {{- \"<|start_header_id|>\" + \"ipython\" + \"<|end_header_id|>\\n\\n\" }}\n {%- set content = message[\"content\"] %}\n {%- if content is mapping or (content is iterable and content is not string) %}\n {{- content | tojson }}\n {%- else %}\n {{- content }}\n {%- endif %}\n {{- \"<|eot_id|>\" }}\n {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' }}\n{%- endif %}", + "clean_up_tokenization_spaces": true, + "eos_token": "<|eot_id|>", + "extra_special_tokens": {}, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 16384, + "pad_token": "<|eot_id|>", + "padding_side": "right", + "split_special_tokens": false, + "tokenizer_class": "PreTrainedTokenizerFast" +} diff --git a/plot_loss_from_trainer_state.py b/plot_loss_from_trainer_state.py new file mode 100644 index 0000000000000000000000000000000000000000..eaf442644d02d8625f8186aa6d050495712cd2f9 --- /dev/null +++ b/plot_loss_from_trainer_state.py @@ -0,0 +1,160 @@ +# -*- coding: utf-8 -*- +""" +Usage: + python plot_loss_from_trainer_state.py --input trainer_state.json --outdir ./plots \ + --checkpoint_steps 263,526,789,1052 + +功能: +- Curve: 黃橘色實線 +- Grid: x,y 虛線 +- Epoch markers: 藍色虛線 + EpochN 標籤(含最後一個 epoch) +- Checkpoints: 藍色小圓點(線性插值;超出範圍時使用端點值,並自動擴張 x 軸確保能看見) +""" +import json, argparse +from pathlib import Path +import matplotlib.pyplot as plt +import numpy as np + +YELLOW_ORANGE = "#d58f00" +BLUE = "#1f77b4" + +def find_epoch_boundaries(log_items): + """找到每個 epoch 邊界 (包含最後一個)""" + boundaries = [] + prev_epoch_int = None + seen = set() + last_step, last_epoch = None, None + for it in log_items: + step = it.get("step") + ep = it.get("epoch") + if step is None or ep is None: + continue + last_step, last_epoch = step, ep + ep_int = int(ep) + if prev_epoch_int is None: + prev_epoch_int = ep_int + continue + if ep_int != prev_epoch_int: + if (step, ep_int) not in seen and ep_int >= 1: + boundaries.append((step, ep_int)) + seen.add((step, ep_int)) + prev_epoch_int = ep_int + # 最後一個 epoch 也補上 + if last_step is not None and last_epoch is not None: + ep_final = int(float(last_epoch)) + 1 + if (last_step, ep_final) not in seen: + boundaries.append((last_step, ep_final)) + boundaries.sort(key=lambda x: x[0]) + return boundaries + +def plot_series(x, y, xlabel, ylabel, title, outpath, + epoch_marks=None, checkpoint_steps=None, + color=YELLOW_ORANGE, linestyle='-'): + fig = plt.figure(figsize=(10,6)) + ax = fig.add_subplot(111) + ax.plot(x, y, color=color, linestyle=linestyle, linewidth=2) + + # 標記 checkpoint 藍點(線性插值;邊界外使用端點值) + extra_x = [] + if checkpoint_steps: + for s in checkpoint_steps: + y_interp = np.interp(s, x, y, left=y[0], right=y[-1]) + ax.plot(s, y_interp, marker='o', color=BLUE, markersize=6) + extra_x.append(s) + + # === 計算 x 範圍時把 epoch 標線也納入,並加右側 padding === + xmin = 0 + all_x_candidates = [max(x)] + if extra_x: + all_x_candidates.append(max(extra_x)) + if epoch_marks: + # 把所有 epoch 標線的 step 納入考量 + ep_steps = [s for (s, _) in epoch_marks] + if ep_steps: + all_x_candidates.append(max(ep_steps)) + + xmax_base = max(all_x_candidates) if all_x_candidates else x[-1] + + # 右邊加一點 margin,避免剛好貼齊看不到線 + span = max(xmax_base - xmin, 1.0) + right_pad = max(1.0, 0.02 * span) # 至少 +1 step 或 2% 寬度 + ax.set_xlim(left=xmin, right=xmax_base + right_pad) + + # y 仍從 0 起 + ax.set_ylim(bottom=0) + + # 虛線格線 + ax.grid(True, which='major', axis='both', linestyle='--', linewidth=0.8, alpha=0.6) + + # epoch 標記 (藍色虛線) + if epoch_marks: + for step, ep in epoch_marks: + ax.axvline(x=step, color=BLUE, linestyle='--', linewidth=1.2) + ymax = ax.get_ylim()[1] + ax.text(step, ymax*0.98, f'Epoch{ep}', rotation=90, + va='top', ha='right', fontsize=8, color=BLUE) + + # label & look(放到最後避免被 set_xlim/set_ylim 影響) + ax.set_xlabel(xlabel); ax.set_ylabel(ylabel); ax.set_title(title) + ax.spines['left'].set_linewidth(2); ax.spines['bottom'].set_linewidth(2) + ax.spines['right'].set_visible(False); ax.spines['top'].set_visible(False) + + fig.savefig(outpath, bbox_inches="tight") + plt.close(fig) + + +def main(): + ap = argparse.ArgumentParser() + ap.add_argument("--input", required=True, help="Path to trainer_state.json") + ap.add_argument("--outdir", default="./plots", help="Directory to save PNGs") + ap.add_argument("--no_epoch_marks", action="store_true", help="Disable vertical epoch markers") + ap.add_argument("--checkpoint_steps", default="", help="Comma-separated steps (e.g., 100,200,500)") + args = ap.parse_args() + + src = Path(args.input) + with open(src, "r", encoding="utf-8") as f: + state = json.load(f) + + log = state.get("log_history", state.get("logs", [])) + + steps, train_losses = [], [] + eval_steps, eval_losses = [], [] + lr_steps, lrs = [], [] + + for item in log: + step = item.get("step") + if step is None: + continue + if "loss" in item: + steps.append(step); train_losses.append(item["loss"]) + if "eval_loss" in item: + eval_steps.append(step); eval_losses.append(item["eval_loss"]) + if "learning_rate" in item: + lr_steps.append(step); lrs.append(item["learning_rate"]) + + outdir = Path(args.outdir); outdir.mkdir(parents=True, exist_ok=True) + + epoch_marks = None if args.no_epoch_marks else find_epoch_boundaries(log) + # 允許空白與混合格式 + raw = [s.strip() for s in args.checkpoint_steps.replace(",", ",").split(",") if s.strip()] + checkpoint_steps = [] + for s in raw: + try: + checkpoint_steps.append(int(float(s))) + except: + pass + + if steps and train_losses: + plot_series(steps, train_losses, "Step", "Training Loss", "Training Loss vs Step", + outdir / "loss_curve.png", epoch_marks=epoch_marks, checkpoint_steps=checkpoint_steps) + if eval_steps and eval_losses: + plot_series(eval_steps, eval_losses, "Step", "Eval Loss", "Eval Loss vs Step", + outdir / "eval_loss_curve.png", epoch_marks=epoch_marks, checkpoint_steps=checkpoint_steps) + if lr_steps and lrs: + plot_series(lr_steps, lrs, "Step", "Learning Rate", "Learning Rate vs Step", + outdir / "lr_curve.png", epoch_marks=epoch_marks, checkpoint_steps=checkpoint_steps) + + print(f"Saved plots to: {outdir.resolve()}") + +if __name__ == "__main__": + main() diff --git a/plots/.ipynb_checkpoints/eval_loss_curve-checkpoint.png b/plots/.ipynb_checkpoints/eval_loss_curve-checkpoint.png new file mode 100644 index 0000000000000000000000000000000000000000..56333b426085bb77fe11217d62868df1cd06fb04 Binary files /dev/null and b/plots/.ipynb_checkpoints/eval_loss_curve-checkpoint.png differ diff --git a/plots/.ipynb_checkpoints/loss_curve-checkpoint.png b/plots/.ipynb_checkpoints/loss_curve-checkpoint.png new file mode 100644 index 0000000000000000000000000000000000000000..2c7f6a8190a00b217dc43fad8497029739307a5f Binary files /dev/null and b/plots/.ipynb_checkpoints/loss_curve-checkpoint.png differ diff --git a/plots/.ipynb_checkpoints/lr_curve-checkpoint.png b/plots/.ipynb_checkpoints/lr_curve-checkpoint.png new file mode 100644 index 0000000000000000000000000000000000000000..b340770291a8b6c2b9921a931895cd17c7b2d658 Binary files /dev/null and b/plots/.ipynb_checkpoints/lr_curve-checkpoint.png differ diff --git a/plots/eval_loss_curve.png b/plots/eval_loss_curve.png new file mode 100644 index 0000000000000000000000000000000000000000..56333b426085bb77fe11217d62868df1cd06fb04 Binary files /dev/null and b/plots/eval_loss_curve.png differ diff --git a/plots/loss_curve.png b/plots/loss_curve.png new file mode 100644 index 0000000000000000000000000000000000000000..2c7f6a8190a00b217dc43fad8497029739307a5f Binary files /dev/null and b/plots/loss_curve.png differ diff --git a/plots/lr_curve.png b/plots/lr_curve.png new file mode 100644 index 0000000000000000000000000000000000000000..b340770291a8b6c2b9921a931895cd17c7b2d658 Binary files /dev/null and b/plots/lr_curve.png differ diff --git a/runs/Oct10_11-06-08_pytorch-deployment-1975165405393633281-74f4c44dd4-7bsbx/events.out.tfevents.1760094377.pytorch-deployment-1975165405393633281-74f4c44dd4-7bsbx.726715.0 b/runs/Oct10_11-06-08_pytorch-deployment-1975165405393633281-74f4c44dd4-7bsbx/events.out.tfevents.1760094377.pytorch-deployment-1975165405393633281-74f4c44dd4-7bsbx.726715.0 new file mode 100644 index 0000000000000000000000000000000000000000..775724f13cd2a9ea846f188155c315e739fea7ab --- /dev/null +++ b/runs/Oct10_11-06-08_pytorch-deployment-1975165405393633281-74f4c44dd4-7bsbx/events.out.tfevents.1760094377.pytorch-deployment-1975165405393633281-74f4c44dd4-7bsbx.726715.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:990d6bd3215322c3946b02a2698375c3825444a14c972230586ad85ee72cf43e +size 7407 diff --git a/runs/Oct10_17-15-33_pytorch-deployment-1975165405393633281-74f4c44dd4-7bsbx/events.out.tfevents.1760116535.pytorch-deployment-1975165405393633281-74f4c44dd4-7bsbx.811615.0 b/runs/Oct10_17-15-33_pytorch-deployment-1975165405393633281-74f4c44dd4-7bsbx/events.out.tfevents.1760116535.pytorch-deployment-1975165405393633281-74f4c44dd4-7bsbx.811615.0 new file mode 100644 index 0000000000000000000000000000000000000000..2e22822d25d8f455676fee6551063ebcb945ac4e --- /dev/null +++ b/runs/Oct10_17-15-33_pytorch-deployment-1975165405393633281-74f4c44dd4-7bsbx/events.out.tfevents.1760116535.pytorch-deployment-1975165405393633281-74f4c44dd4-7bsbx.811615.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5adbe45368f0fd5cc445b1b15656932115f8b0fa42448b211e4a9e29d826917d +size 41407 diff --git a/special_tokens_map.json b/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..1c69f55fd7bb10288d16d4c76497e21e25adfe4f --- /dev/null +++ b/special_tokens_map.json @@ -0,0 +1,27 @@ +{ + "additional_special_tokens": [ + "<|eot_id|>", + "<|eom_id|>" + ], + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/tokenizer.json b/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..1c1d8d5c9024994f1d3b00f9662b8dd89ca13cf2 --- /dev/null +++ b/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b9e4e7fb171f92fd137b777cc2714bf87d11576700a1dcd7a399e7bbe39537b +size 17209920 diff --git a/tokenizer_config.json b/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..3c3fc3b2c234aa8b39e8ee01e3ee9e6fe54a2a1f --- /dev/null +++ b/tokenizer_config.json @@ -0,0 +1,2070 @@ +{ + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|python_tag|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "additional_special_tokens": [ + "<|eot_id|>", + "<|eom_id|>" + ], + "bos_token": "<|begin_of_text|>", + "chat_template": "{{- bos_token }}\n{%- if custom_tools is defined %}\n {%- set tools = custom_tools %}\n{%- endif %}\n{%- if not tools_in_user_message is defined %}\n {%- set tools_in_user_message = true %}\n{%- endif %}\n{%- if not tools is defined %}\n {%- set tools = none %}\n{%- endif %}\n\n{#- Extract system message #}\n{{- \"<|start_header_id|>system<|end_header_id|>\\n\\n\" }}\n{%- if messages[0]['role'] == 'system' %}\n {%- set system_message = messages[0]['content'] | trim %}\n {%- set messages = messages[1:] %}\n {{- system_message + \"\\n\" }}\n{%- else %}\n {%- set system_message = \"You are a helpful assistant that can use tools. You are developed by Salesforce xLAM team.\" %}\n {% set format_instruction %}You have access to a set of tools. When using tools, make calls in a single JSON array: \n\n[{\"name\": \"tool_call_name\", \"arguments\": {\"arg1\": \"value1\", \"arg2\": \"value2\"}}, ... (additional parallel tool calls as needed)]\n\nIf no tool is suitable, state that explicitly. If the user's input lacks required parameters, ask for clarification. Do not interpret or respond until tool results are returned. Once they are available, process them or make additional calls if needed. For tasks that don't require tools, such as casual conversation or general advice, respond directly in plain text. The available tools are:{% endset %}\n {{- system_message + \"\\n\" }}\n {%- if tools is not none %}\n {{- format_instruction + \"\\n\\n\" }}\n {%- endif %}\n{%- endif %}\n\n\n{%- if tools is not none %}\n {%- for t in tools %}\n {{- t | tojson(indent=4) }}\n {{- \"\\n\\n\" }}\n {%- endfor %}\n{%- endif %}\n{{- \"<|eot_id|>\" }}\n\n{%- for message in messages %}\n {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %}\n {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\\n\\n'+ message['content'] | trim + '<|eot_id|>' }}\n {%- elif 'tool_calls' in message %}\n {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' -}}\n {%- if message['tool_calls'] %}\n {{- \"[\" }}\n {%- for tool_call_function in message.tool_calls %}\n {%- set tool_call = tool_call_function.function %}\n {{- '{\"name\": \"' + tool_call.name + '\", ' }}\n {{- '\"arguments\": ' }}\n {{- tool_call.arguments | tojson }}\n {{- \"}\" }}\n {%- if not loop.last %}\n {{- \", \" }}\n {%- endif %}\n {%- endfor %}\n {{- \"]\" }}\n {{- \"<|eot_id|>\" }}\n {%- elif message['content'] %}\n {{- message['content'] | trim + '<|eot_id|>' }}\n {%- else %}\n {{- \"[]\\n\" + '<|eot_id|>' }}\n {%- endif %}\n {%- elif message.role == \"tool\" or message.role == \"ipython\" %}\n {{- \"<|start_header_id|>\" + \"ipython\" + \"<|end_header_id|>\\n\\n\" }}\n {%- set content = message[\"content\"] %}\n {%- if content is mapping or (content is iterable and content is not string) %}\n {{- content | tojson }}\n {%- else %}\n {{- content }}\n {%- endif %}\n {{- \"<|eot_id|>\" }}\n {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' }}\n{%- endif %}", + "clean_up_tokenization_spaces": true, + "eos_token": "<|eot_id|>", + "extra_special_tokens": {}, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 16384, + "pad_token": "<|eot_id|>", + "padding_side": "right", + "split_special_tokens": false, + "tokenizer_class": "PreTrainedTokenizerFast" +} diff --git a/trainer_state.json b/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..8de32b408b3f406a67290af8d644b0366a3d5e0e --- /dev/null +++ b/trainer_state.json @@ -0,0 +1,1206 @@ +{ + "best_metric": 0.09926149994134903, + "best_model_checkpoint": "./xlam_lora_new_2560_1_delete_over_size_3epoch_multi/checkpoint-1384", + "epoch": 2.9994592321095954, + "eval_steps": 173, + "global_step": 1560, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.019227302769933306, + "grad_norm": 0.8050442337989807, + "learning_rate": 3.205128205128205e-06, + "loss": 0.6642, + "step": 10 + }, + { + "epoch": 0.03845460553986661, + "grad_norm": 0.45196670293807983, + "learning_rate": 6.41025641025641e-06, + "loss": 0.6377, + "step": 20 + }, + { + "epoch": 0.05768190830979992, + "grad_norm": 0.47538116574287415, + "learning_rate": 9.615384615384616e-06, + "loss": 0.6205, + "step": 30 + }, + { + "epoch": 0.07690921107973323, + "grad_norm": 0.34218236804008484, + "learning_rate": 1.282051282051282e-05, + "loss": 0.5579, + "step": 40 + }, + { + "epoch": 0.09613651384966652, + "grad_norm": 0.38529354333877563, + "learning_rate": 1.602564102564103e-05, + "loss": 0.4393, + "step": 50 + }, + { + "epoch": 0.11536381661959984, + "grad_norm": 0.3676348924636841, + "learning_rate": 1.923076923076923e-05, + "loss": 0.3965, + "step": 60 + }, + { + "epoch": 0.13459111938953314, + "grad_norm": 0.3427989184856415, + "learning_rate": 2.2435897435897437e-05, + "loss": 0.3488, + "step": 70 + }, + { + "epoch": 0.15381842215946645, + "grad_norm": 0.30555886030197144, + "learning_rate": 2.4999887657859027e-05, + "loss": 0.2224, + "step": 80 + }, + { + "epoch": 0.17304572492939974, + "grad_norm": 0.3501119315624237, + "learning_rate": 2.4995955894949523e-05, + "loss": 0.248, + "step": 90 + }, + { + "epoch": 0.19227302769933305, + "grad_norm": 0.36164987087249756, + "learning_rate": 2.4986409044149163e-05, + "loss": 0.2322, + "step": 100 + }, + { + "epoch": 0.21150033046926636, + "grad_norm": 0.3375028967857361, + "learning_rate": 2.4971251395358342e-05, + "loss": 0.2427, + "step": 110 + }, + { + "epoch": 0.23072763323919968, + "grad_norm": 0.3321882486343384, + "learning_rate": 2.495048975970308e-05, + "loss": 0.1967, + "step": 120 + }, + { + "epoch": 0.24995493600913296, + "grad_norm": 0.2828320562839508, + "learning_rate": 2.492413346647437e-05, + "loss": 0.1636, + "step": 130 + }, + { + "epoch": 0.2691822387790663, + "grad_norm": 0.3430372476577759, + "learning_rate": 2.4892194358936095e-05, + "loss": 0.2041, + "step": 140 + }, + { + "epoch": 0.28840954154899956, + "grad_norm": 0.3393559455871582, + "learning_rate": 2.4854686789003173e-05, + "loss": 0.1911, + "step": 150 + }, + { + "epoch": 0.3076368443189329, + "grad_norm": 0.36908936500549316, + "learning_rate": 2.4811627610792543e-05, + "loss": 0.2025, + "step": 160 + }, + { + "epoch": 0.3268641470888662, + "grad_norm": 0.38679710030555725, + "learning_rate": 2.4763036173049677e-05, + "loss": 0.1566, + "step": 170 + }, + { + "epoch": 0.3326323379198462, + "eval_loss": 0.14381718635559082, + "eval_runtime": 202.8561, + "eval_samples_per_second": 5.285, + "eval_steps_per_second": 5.285, + "step": 173 + }, + { + "epoch": 0.34609144985879947, + "grad_norm": 0.39648687839508057, + "learning_rate": 2.4708934310454207e-05, + "loss": 0.1943, + "step": 180 + }, + { + "epoch": 0.3653187526287328, + "grad_norm": 0.3979399800300598, + "learning_rate": 2.4649346333808458e-05, + "loss": 0.1594, + "step": 190 + }, + { + "epoch": 0.3845460553986661, + "grad_norm": 0.34854283928871155, + "learning_rate": 2.458429901911331e-05, + "loss": 0.1683, + "step": 200 + }, + { + "epoch": 0.40377335816859944, + "grad_norm": 0.26675811409950256, + "learning_rate": 2.4513821595536356e-05, + "loss": 0.1616, + "step": 210 + }, + { + "epoch": 0.4230006609385327, + "grad_norm": 0.4399104118347168, + "learning_rate": 2.44379457322777e-05, + "loss": 0.1664, + "step": 220 + }, + { + "epoch": 0.442227963708466, + "grad_norm": 0.5316939353942871, + "learning_rate": 2.4356705524339317e-05, + "loss": 0.1745, + "step": 230 + }, + { + "epoch": 0.46145526647839935, + "grad_norm": 0.5996547341346741, + "learning_rate": 2.4270137477204408e-05, + "loss": 0.1753, + "step": 240 + }, + { + "epoch": 0.48068256924833264, + "grad_norm": 0.4330001175403595, + "learning_rate": 2.417828049043353e-05, + "loss": 0.1997, + "step": 250 + }, + { + "epoch": 0.4999098720182659, + "grad_norm": 0.4255751073360443, + "learning_rate": 2.4081175840185022e-05, + "loss": 0.1728, + "step": 260 + }, + { + "epoch": 0.5191371747881992, + "grad_norm": 0.536382257938385, + "learning_rate": 2.3978867160667457e-05, + "loss": 0.147, + "step": 270 + }, + { + "epoch": 0.5383644775581325, + "grad_norm": 0.5623698830604553, + "learning_rate": 2.3871400424532493e-05, + "loss": 0.1863, + "step": 280 + }, + { + "epoch": 0.5575917803280659, + "grad_norm": 0.49679550528526306, + "learning_rate": 2.375882392221695e-05, + "loss": 0.1685, + "step": 290 + }, + { + "epoch": 0.5768190830979991, + "grad_norm": 0.5784851908683777, + "learning_rate": 2.36411882402434e-05, + "loss": 0.1506, + "step": 300 + }, + { + "epoch": 0.5960463858679325, + "grad_norm": 0.6098183393478394, + "learning_rate": 2.3518546238489e-05, + "loss": 0.1565, + "step": 310 + }, + { + "epoch": 0.6152736886378658, + "grad_norm": 0.5198598504066467, + "learning_rate": 2.339095302643273e-05, + "loss": 0.1433, + "step": 320 + }, + { + "epoch": 0.634500991407799, + "grad_norm": 0.5796005129814148, + "learning_rate": 2.325846593839188e-05, + "loss": 0.1668, + "step": 330 + }, + { + "epoch": 0.6537282941777324, + "grad_norm": 0.6006646752357483, + "learning_rate": 2.312114450775869e-05, + "loss": 0.1505, + "step": 340 + }, + { + "epoch": 0.6652646758396924, + "eval_loss": 0.1198095753788948, + "eval_runtime": 202.8938, + "eval_samples_per_second": 5.284, + "eval_steps_per_second": 5.284, + "step": 346 + }, + { + "epoch": 0.6729555969476657, + "grad_norm": 0.5787773728370667, + "learning_rate": 2.2979050440248896e-05, + "loss": 0.1442, + "step": 350 + }, + { + "epoch": 0.6921828997175989, + "grad_norm": 0.5230283141136169, + "learning_rate": 2.2832247586174118e-05, + "loss": 0.1555, + "step": 360 + }, + { + "epoch": 0.7114102024875323, + "grad_norm": 0.5551069378852844, + "learning_rate": 2.2680801911750558e-05, + "loss": 0.1422, + "step": 370 + }, + { + "epoch": 0.7306375052574656, + "grad_norm": 0.5769614577293396, + "learning_rate": 2.2524781469456928e-05, + "loss": 0.165, + "step": 380 + }, + { + "epoch": 0.7498648080273989, + "grad_norm": 0.6609200239181519, + "learning_rate": 2.2364256367454922e-05, + "loss": 0.161, + "step": 390 + }, + { + "epoch": 0.7690921107973322, + "grad_norm": 0.5530131459236145, + "learning_rate": 2.2199298738085907e-05, + "loss": 0.1709, + "step": 400 + }, + { + "epoch": 0.7883194135672655, + "grad_norm": 0.7019795775413513, + "learning_rate": 2.2029982705458107e-05, + "loss": 0.1471, + "step": 410 + }, + { + "epoch": 0.8075467163371989, + "grad_norm": 0.5327528715133667, + "learning_rate": 2.1856384352138765e-05, + "loss": 0.1913, + "step": 420 + }, + { + "epoch": 0.8267740191071321, + "grad_norm": 0.5548112988471985, + "learning_rate": 2.1678581684966235e-05, + "loss": 0.1509, + "step": 430 + }, + { + "epoch": 0.8460013218770654, + "grad_norm": 0.51619553565979, + "learning_rate": 2.149665459999743e-05, + "loss": 0.1341, + "step": 440 + }, + { + "epoch": 0.8652286246469988, + "grad_norm": 0.6642457842826843, + "learning_rate": 2.1310684846606346e-05, + "loss": 0.1458, + "step": 450 + }, + { + "epoch": 0.884455927416932, + "grad_norm": 0.48370271921157837, + "learning_rate": 2.1120755990749762e-05, + "loss": 0.1584, + "step": 460 + }, + { + "epoch": 0.9036832301868654, + "grad_norm": 0.8130201697349548, + "learning_rate": 2.092695337741671e-05, + "loss": 0.1389, + "step": 470 + }, + { + "epoch": 0.9229105329567987, + "grad_norm": 0.4986889958381653, + "learning_rate": 2.0729364092278456e-05, + "loss": 0.1263, + "step": 480 + }, + { + "epoch": 0.9421378357267319, + "grad_norm": 0.6791219711303711, + "learning_rate": 2.052807692255638e-05, + "loss": 0.1562, + "step": 490 + }, + { + "epoch": 0.9613651384966653, + "grad_norm": 0.6069239974021912, + "learning_rate": 2.0323182317125198e-05, + "loss": 0.1296, + "step": 500 + }, + { + "epoch": 0.9805924412665986, + "grad_norm": 0.6993957161903381, + "learning_rate": 2.011477234586957e-05, + "loss": 0.1695, + "step": 510 + }, + { + "epoch": 0.9978970137595385, + "eval_loss": 0.11108512431383133, + "eval_runtime": 202.9151, + "eval_samples_per_second": 5.283, + "eval_steps_per_second": 5.283, + "step": 519 + }, + { + "epoch": 0.9998197440365318, + "grad_norm": 0.5495030283927917, + "learning_rate": 1.9902940658312253e-05, + "loss": 0.1512, + "step": 520 + }, + { + "epoch": 1.0190470468064652, + "grad_norm": 0.5100754499435425, + "learning_rate": 1.968778244153246e-05, + "loss": 0.1088, + "step": 530 + }, + { + "epoch": 1.0382743495763984, + "grad_norm": 0.6836853623390198, + "learning_rate": 1.9469394377393335e-05, + "loss": 0.1524, + "step": 540 + }, + { + "epoch": 1.0575016523463319, + "grad_norm": 0.5304776430130005, + "learning_rate": 1.9247874599097714e-05, + "loss": 0.1239, + "step": 550 + }, + { + "epoch": 1.076728955116265, + "grad_norm": 0.6995298862457275, + "learning_rate": 1.9023322647091736e-05, + "loss": 0.1203, + "step": 560 + }, + { + "epoch": 1.0959562578861983, + "grad_norm": 0.579207181930542, + "learning_rate": 1.8795839424336097e-05, + "loss": 0.134, + "step": 570 + }, + { + "epoch": 1.1151835606561318, + "grad_norm": 0.4746134877204895, + "learning_rate": 1.8565527150965077e-05, + "loss": 0.1344, + "step": 580 + }, + { + "epoch": 1.134410863426065, + "grad_norm": 0.8127744793891907, + "learning_rate": 1.8332489318353655e-05, + "loss": 0.1157, + "step": 590 + }, + { + "epoch": 1.1536381661959982, + "grad_norm": 0.6949151158332825, + "learning_rate": 1.809683064261343e-05, + "loss": 0.1197, + "step": 600 + }, + { + "epoch": 1.1728654689659317, + "grad_norm": 0.6869731545448303, + "learning_rate": 1.7858657017538178e-05, + "loss": 0.1392, + "step": 610 + }, + { + "epoch": 1.192092771735865, + "grad_norm": 0.7461158037185669, + "learning_rate": 1.7618075467020213e-05, + "loss": 0.1262, + "step": 620 + }, + { + "epoch": 1.2113200745057981, + "grad_norm": 0.5442166924476624, + "learning_rate": 1.7375194096958946e-05, + "loss": 0.1258, + "step": 630 + }, + { + "epoch": 1.2305473772757316, + "grad_norm": 0.7670741081237793, + "learning_rate": 1.713012204668325e-05, + "loss": 0.1204, + "step": 640 + }, + { + "epoch": 1.2497746800456648, + "grad_norm": 0.3919640779495239, + "learning_rate": 1.6882969439909434e-05, + "loss": 0.1444, + "step": 650 + }, + { + "epoch": 1.269001982815598, + "grad_norm": 0.6234434247016907, + "learning_rate": 1.663384733525686e-05, + "loss": 0.1245, + "step": 660 + }, + { + "epoch": 1.2882292855855315, + "grad_norm": 0.7237009406089783, + "learning_rate": 1.638286767634353e-05, + "loss": 0.1258, + "step": 670 + }, + { + "epoch": 1.3074565883554647, + "grad_norm": 0.6398624181747437, + "learning_rate": 1.613014324148392e-05, + "loss": 0.1519, + "step": 680 + }, + { + "epoch": 1.326683891125398, + "grad_norm": 0.7676591873168945, + "learning_rate": 1.5875787593011784e-05, + "loss": 0.1545, + "step": 690 + }, + { + "epoch": 1.3305293516793848, + "eval_loss": 0.10604555904865265, + "eval_runtime": 203.0173, + "eval_samples_per_second": 5.28, + "eval_steps_per_second": 5.28, + "step": 692 + }, + { + "epoch": 1.3459111938953314, + "grad_norm": 0.5583875775337219, + "learning_rate": 1.5619915026250646e-05, + "loss": 0.1141, + "step": 700 + }, + { + "epoch": 1.3651384966652647, + "grad_norm": 0.5790243148803711, + "learning_rate": 1.536264051815491e-05, + "loss": 0.1326, + "step": 710 + }, + { + "epoch": 1.3843657994351979, + "grad_norm": 0.7467628121376038, + "learning_rate": 1.5104079675644706e-05, + "loss": 0.1439, + "step": 720 + }, + { + "epoch": 1.4035931022051313, + "grad_norm": 0.9867657423019409, + "learning_rate": 1.4844348683657616e-05, + "loss": 0.1385, + "step": 730 + }, + { + "epoch": 1.4228204049750646, + "grad_norm": 0.7909297347068787, + "learning_rate": 1.4583564252940735e-05, + "loss": 0.1259, + "step": 740 + }, + { + "epoch": 1.4420477077449978, + "grad_norm": 0.6159791350364685, + "learning_rate": 1.432184356760637e-05, + "loss": 0.1126, + "step": 750 + }, + { + "epoch": 1.4612750105149312, + "grad_norm": 0.6234619617462158, + "learning_rate": 1.4059304232475098e-05, + "loss": 0.1144, + "step": 760 + }, + { + "epoch": 1.4805023132848645, + "grad_norm": 0.7142959833145142, + "learning_rate": 1.3796064220229765e-05, + "loss": 0.1249, + "step": 770 + }, + { + "epoch": 1.4997296160547977, + "grad_norm": 0.6258341073989868, + "learning_rate": 1.3532241818404156e-05, + "loss": 0.1321, + "step": 780 + }, + { + "epoch": 1.5189569188247312, + "grad_norm": 0.5723307728767395, + "learning_rate": 1.326795557623022e-05, + "loss": 0.1193, + "step": 790 + }, + { + "epoch": 1.5381842215946644, + "grad_norm": 0.7454131841659546, + "learning_rate": 1.300332425136769e-05, + "loss": 0.1281, + "step": 800 + }, + { + "epoch": 1.5574115243645976, + "grad_norm": 0.5975070595741272, + "learning_rate": 1.273846675654003e-05, + "loss": 0.1321, + "step": 810 + }, + { + "epoch": 1.576638827134531, + "grad_norm": 0.7056507468223572, + "learning_rate": 1.2473502106100723e-05, + "loss": 0.1444, + "step": 820 + }, + { + "epoch": 1.5958661299044643, + "grad_norm": 0.7889280915260315, + "learning_rate": 1.2208549362553885e-05, + "loss": 0.1226, + "step": 830 + }, + { + "epoch": 1.6150934326743975, + "grad_norm": 0.7041313648223877, + "learning_rate": 1.194372758305325e-05, + "loss": 0.1316, + "step": 840 + }, + { + "epoch": 1.634320735444331, + "grad_norm": 0.7797935605049133, + "learning_rate": 1.1679155765903524e-05, + "loss": 0.132, + "step": 850 + }, + { + "epoch": 1.6535480382142642, + "grad_norm": 0.6426231861114502, + "learning_rate": 1.1414952797088248e-05, + "loss": 0.1101, + "step": 860 + }, + { + "epoch": 1.663161689599231, + "eval_loss": 0.10293085128068924, + "eval_runtime": 203.1567, + "eval_samples_per_second": 5.277, + "eval_steps_per_second": 5.277, + "step": 865 + }, + { + "epoch": 1.6727753409841974, + "grad_norm": 1.0461760759353638, + "learning_rate": 1.1151237396848058e-05, + "loss": 0.128, + "step": 870 + }, + { + "epoch": 1.692002643754131, + "grad_norm": 0.8692240118980408, + "learning_rate": 1.088812806633349e-05, + "loss": 0.1114, + "step": 880 + }, + { + "epoch": 1.7112299465240641, + "grad_norm": 0.5583866238594055, + "learning_rate": 1.0625743034356183e-05, + "loss": 0.1309, + "step": 890 + }, + { + "epoch": 1.7304572492939974, + "grad_norm": 0.5476118922233582, + "learning_rate": 1.0364200204262473e-05, + "loss": 0.1156, + "step": 900 + }, + { + "epoch": 1.7496845520639308, + "grad_norm": 0.8960713148117065, + "learning_rate": 1.0103617100953274e-05, + "loss": 0.1305, + "step": 910 + }, + { + "epoch": 1.768911854833864, + "grad_norm": 0.6927953958511353, + "learning_rate": 9.84411081807393e-06, + "loss": 0.1245, + "step": 920 + }, + { + "epoch": 1.7881391576037973, + "grad_norm": 0.5891989469528198, + "learning_rate": 9.585797965397949e-06, + "loss": 0.1125, + "step": 930 + }, + { + "epoch": 1.8073664603737307, + "grad_norm": 0.8319947123527527, + "learning_rate": 9.328794616428092e-06, + "loss": 0.1462, + "step": 940 + }, + { + "epoch": 1.826593763143664, + "grad_norm": 0.7439499497413635, + "learning_rate": 9.073216256238485e-06, + "loss": 0.1167, + "step": 950 + }, + { + "epoch": 1.8458210659135972, + "grad_norm": 0.7593638896942139, + "learning_rate": 8.8191777295811e-06, + "loss": 0.1356, + "step": 960 + }, + { + "epoch": 1.8650483686835306, + "grad_norm": 0.871376097202301, + "learning_rate": 8.56679318928e-06, + "loss": 0.1173, + "step": 970 + }, + { + "epoch": 1.884275671453464, + "grad_norm": 0.8772872090339661, + "learning_rate": 8.31617604493651e-06, + "loss": 0.1347, + "step": 980 + }, + { + "epoch": 1.903502974223397, + "grad_norm": 0.6309168934822083, + "learning_rate": 8.067438911968305e-06, + "loss": 0.1382, + "step": 990 + }, + { + "epoch": 1.9227302769933305, + "grad_norm": 0.775113046169281, + "learning_rate": 7.820693561005429e-06, + "loss": 0.1368, + "step": 1000 + }, + { + "epoch": 1.941957579763264, + "grad_norm": 0.9096739888191223, + "learning_rate": 7.576050867665876e-06, + "loss": 0.1263, + "step": 1010 + }, + { + "epoch": 1.961184882533197, + "grad_norm": 0.7637848258018494, + "learning_rate": 7.333620762733376e-06, + "loss": 0.1148, + "step": 1020 + }, + { + "epoch": 1.9804121853031305, + "grad_norm": 0.8084997534751892, + "learning_rate": 7.0935121827597245e-06, + "loss": 0.1457, + "step": 1030 + }, + { + "epoch": 1.995794027519077, + "eval_loss": 0.10069960355758667, + "eval_runtime": 203.0573, + "eval_samples_per_second": 5.279, + "eval_steps_per_second": 5.279, + "step": 1038 + }, + { + "epoch": 1.999639488073064, + "grad_norm": 1.0884274244308472, + "learning_rate": 6.855833021113886e-06, + "loss": 0.1641, + "step": 1040 + }, + { + "epoch": 2.018866790842997, + "grad_norm": 0.702237069606781, + "learning_rate": 6.620690079499835e-06, + "loss": 0.1159, + "step": 1050 + }, + { + "epoch": 2.0380940936129304, + "grad_norm": 0.6377178430557251, + "learning_rate": 6.388189019964976e-06, + "loss": 0.1103, + "step": 1060 + }, + { + "epoch": 2.057321396382864, + "grad_norm": 0.8843504786491394, + "learning_rate": 6.158434317420636e-06, + "loss": 0.1178, + "step": 1070 + }, + { + "epoch": 2.076548699152797, + "grad_norm": 0.42746174335479736, + "learning_rate": 5.931529212695996e-06, + "loss": 0.1143, + "step": 1080 + }, + { + "epoch": 2.0957760019227303, + "grad_norm": 0.7449749708175659, + "learning_rate": 5.70757566614661e-06, + "loss": 0.1262, + "step": 1090 + }, + { + "epoch": 2.1150033046926637, + "grad_norm": 0.6538805961608887, + "learning_rate": 5.48667431183824e-06, + "loss": 0.1344, + "step": 1100 + }, + { + "epoch": 2.1342306074625967, + "grad_norm": 0.8034993410110474, + "learning_rate": 5.268924412326709e-06, + "loss": 0.1447, + "step": 1110 + }, + { + "epoch": 2.15345791023253, + "grad_norm": 0.7438477277755737, + "learning_rate": 5.054423814054049e-06, + "loss": 0.1082, + "step": 1120 + }, + { + "epoch": 2.1726852130024636, + "grad_norm": 0.5646623373031616, + "learning_rate": 4.843268903380932e-06, + "loss": 0.1199, + "step": 1130 + }, + { + "epoch": 2.1919125157723967, + "grad_norm": 0.9965047240257263, + "learning_rate": 4.6355545632752575e-06, + "loss": 0.1303, + "step": 1140 + }, + { + "epoch": 2.21113981854233, + "grad_norm": 0.8709131479263306, + "learning_rate": 4.4313741306762495e-06, + "loss": 0.1107, + "step": 1150 + }, + { + "epoch": 2.2303671213122636, + "grad_norm": 0.6653530597686768, + "learning_rate": 4.230819354553279e-06, + "loss": 0.1053, + "step": 1160 + }, + { + "epoch": 2.2495944240821966, + "grad_norm": 0.766173243522644, + "learning_rate": 4.033980354678239e-06, + "loss": 0.1017, + "step": 1170 + }, + { + "epoch": 2.26882172685213, + "grad_norm": 0.5112572312355042, + "learning_rate": 3.840945581130008e-06, + "loss": 0.109, + "step": 1180 + }, + { + "epoch": 2.2880490296220635, + "grad_norm": 0.8744060397148132, + "learning_rate": 3.651801774549213e-06, + "loss": 0.1026, + "step": 1190 + }, + { + "epoch": 2.3072763323919965, + "grad_norm": 0.8215727806091309, + "learning_rate": 3.4666339271610836e-06, + "loss": 0.1058, + "step": 1200 + }, + { + "epoch": 2.32650363516193, + "grad_norm": 0.6597920656204224, + "learning_rate": 3.285525244584017e-06, + "loss": 0.1378, + "step": 1210 + }, + { + "epoch": 2.3284263654389235, + "eval_loss": 0.10013294219970703, + "eval_runtime": 203.5302, + "eval_samples_per_second": 5.267, + "eval_steps_per_second": 5.267, + "step": 1211 + }, + { + "epoch": 2.3457309379318634, + "grad_norm": 0.7206103205680847, + "learning_rate": 3.108557108440914e-06, + "loss": 0.1028, + "step": 1220 + }, + { + "epoch": 2.3649582407017964, + "grad_norm": 0.968497097492218, + "learning_rate": 2.9358090397901634e-06, + "loss": 0.1345, + "step": 1230 + }, + { + "epoch": 2.38418554347173, + "grad_norm": 0.7522798180580139, + "learning_rate": 2.767358663392658e-06, + "loss": 0.1029, + "step": 1240 + }, + { + "epoch": 2.4034128462416633, + "grad_norm": 0.8699542284011841, + "learning_rate": 2.6032816728309166e-06, + "loss": 0.1181, + "step": 1250 + }, + { + "epoch": 2.4226401490115963, + "grad_norm": 0.8779841661453247, + "learning_rate": 2.4436517964960005e-06, + "loss": 0.1028, + "step": 1260 + }, + { + "epoch": 2.4418674517815298, + "grad_norm": 0.6922764182090759, + "learning_rate": 2.2885407644574696e-06, + "loss": 0.1148, + "step": 1270 + }, + { + "epoch": 2.461094754551463, + "grad_norm": 0.7528237700462341, + "learning_rate": 2.1380182762313238e-06, + "loss": 0.1128, + "step": 1280 + }, + { + "epoch": 2.480322057321396, + "grad_norm": 0.8349286913871765, + "learning_rate": 1.992151969460333e-06, + "loss": 0.1027, + "step": 1290 + }, + { + "epoch": 2.4995493600913297, + "grad_norm": 0.8040717244148254, + "learning_rate": 1.8510073895209131e-06, + "loss": 0.1001, + "step": 1300 + }, + { + "epoch": 2.518776662861263, + "grad_norm": 0.8065551519393921, + "learning_rate": 1.7146479600701565e-06, + "loss": 0.1454, + "step": 1310 + }, + { + "epoch": 2.538003965631196, + "grad_norm": 0.7855721712112427, + "learning_rate": 1.5831349545462461e-06, + "loss": 0.1063, + "step": 1320 + }, + { + "epoch": 2.5572312684011296, + "grad_norm": 0.9087608456611633, + "learning_rate": 1.4565274686351022e-06, + "loss": 0.1155, + "step": 1330 + }, + { + "epoch": 2.576458571171063, + "grad_norm": 0.49701324105262756, + "learning_rate": 1.334882393715585e-06, + "loss": 0.1001, + "step": 1340 + }, + { + "epoch": 2.5956858739409965, + "grad_norm": 0.7943114638328552, + "learning_rate": 1.2182543912952178e-06, + "loss": 0.1107, + "step": 1350 + }, + { + "epoch": 2.6149131767109295, + "grad_norm": 0.8685261607170105, + "learning_rate": 1.1066958684479074e-06, + "loss": 0.1209, + "step": 1360 + }, + { + "epoch": 2.634140479480863, + "grad_norm": 1.0667730569839478, + "learning_rate": 1.0002569542646973e-06, + "loss": 0.1361, + "step": 1370 + }, + { + "epoch": 2.653367782250796, + "grad_norm": 0.6879278421401978, + "learning_rate": 8.989854773281486e-07, + "loss": 0.0925, + "step": 1380 + }, + { + "epoch": 2.6610587033587696, + "eval_loss": 0.09926149994134903, + "eval_runtime": 203.0153, + "eval_samples_per_second": 5.28, + "eval_steps_per_second": 5.28, + "step": 1384 + }, + { + "epoch": 2.6725950850207294, + "grad_norm": 0.7204756736755371, + "learning_rate": 8.029269442204348e-07, + "loss": 0.1148, + "step": 1390 + }, + { + "epoch": 2.691822387790663, + "grad_norm": 0.834997832775116, + "learning_rate": 7.121245190748708e-07, + "loss": 0.0918, + "step": 1400 + }, + { + "epoch": 2.7110496905605963, + "grad_norm": 0.8163384795188904, + "learning_rate": 6.266190041799805e-07, + "loss": 0.1345, + "step": 1410 + }, + { + "epoch": 2.7302769933305293, + "grad_norm": 0.6108123660087585, + "learning_rate": 5.464488216449154e-07, + "loss": 0.1235, + "step": 1420 + }, + { + "epoch": 2.7495042961004628, + "grad_norm": 0.8302232027053833, + "learning_rate": 4.716499961343698e-07, + "loss": 0.1163, + "step": 1430 + }, + { + "epoch": 2.7687315988703958, + "grad_norm": 0.670668363571167, + "learning_rate": 4.022561386808177e-07, + "loss": 0.1103, + "step": 1440 + }, + { + "epoch": 2.7879589016403292, + "grad_norm": 0.7220197319984436, + "learning_rate": 3.3829843158131175e-07, + "loss": 0.1228, + "step": 1450 + }, + { + "epoch": 2.8071862044102627, + "grad_norm": 0.5018804669380188, + "learning_rate": 2.798056143856462e-07, + "loss": 0.1225, + "step": 1460 + }, + { + "epoch": 2.826413507180196, + "grad_norm": 0.5343906283378601, + "learning_rate": 2.268039709821687e-07, + "loss": 0.0918, + "step": 1470 + }, + { + "epoch": 2.845640809950129, + "grad_norm": 0.6775656938552856, + "learning_rate": 1.7931731778705052e-07, + "loss": 0.0903, + "step": 1480 + }, + { + "epoch": 2.8648681127200626, + "grad_norm": 0.7841689586639404, + "learning_rate": 1.373669930423288e-07, + "loss": 0.1308, + "step": 1490 + }, + { + "epoch": 2.8840954154899956, + "grad_norm": 0.8570185303688049, + "learning_rate": 1.0097184722750592e-07, + "loss": 0.1287, + "step": 1500 + }, + { + "epoch": 2.903322718259929, + "grad_norm": 0.635200023651123, + "learning_rate": 7.014823458905001e-08, + "loss": 0.1011, + "step": 1510 + }, + { + "epoch": 2.9225500210298625, + "grad_norm": 0.7127873301506042, + "learning_rate": 4.4910005791570786e-08, + "loss": 0.1345, + "step": 1520 + }, + { + "epoch": 2.941777323799796, + "grad_norm": 0.9114808440208435, + "learning_rate": 2.526850169399103e-08, + "loss": 0.1132, + "step": 1530 + }, + { + "epoch": 2.961004626569729, + "grad_norm": 0.7554405927658081, + "learning_rate": 1.1232548253503616e-08, + "loss": 0.1091, + "step": 1540 + }, + { + "epoch": 2.9802319293396624, + "grad_norm": 0.7547165155410767, + "learning_rate": 2.8084525596064337e-09, + "loss": 0.0944, + "step": 1550 + }, + { + "epoch": 2.9936910412786157, + "eval_loss": 0.09935057163238525, + "eval_runtime": 203.0468, + "eval_samples_per_second": 5.28, + "eval_steps_per_second": 5.28, + "step": 1557 + }, + { + "epoch": 2.9994592321095954, + "grad_norm": 0.7488301992416382, + "learning_rate": 0.0, + "loss": 0.1237, + "step": 1560 + }, + { + "epoch": 2.9994592321095954, + "step": 1560, + "total_flos": 1.3623219564340838e+18, + "train_loss": 0.15373969880434182, + "train_runtime": 33993.1903, + "train_samples_per_second": 1.469, + "train_steps_per_second": 0.046 + } + ], + "logging_steps": 10, + "max_steps": 1560, + "num_input_tokens_seen": 0, + "num_train_epochs": 3, + "save_steps": 173, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": true + }, + "attributes": {} + } + }, + "total_flos": 1.3623219564340838e+18, + "train_batch_size": 1, + "trial_name": null, + "trial_params": null +} diff --git a/training_args.bin b/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..80c5d5c3761b0cd827095650fdc744b87401c78b --- /dev/null +++ b/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:70ce65338e377750e8ed719c3c255445a47816429c580e0d811222e2d91c722b +size 5624 diff --git a/training_meta.json b/training_meta.json new file mode 100644 index 0000000000000000000000000000000000000000..d4f4b3f249b4b0956200245e4fd02147b18ead1f --- /dev/null +++ b/training_meta.json @@ -0,0 +1,25 @@ +{ + "num_samples_train": 16643, + "world_size": 1, + "effective_batch_size": 32, + "steps_per_epoch": 521, + "save_steps": 173, + "saves_per_epoch": 3, + "total_steps_est": 1563, + "approx_ckpts": 10, + "target_modules": [ + "q_proj", + "k_proj", + "v_proj", + "o_proj" + ], + "lora_r": 16, + "lora_alpha": 32, + "lora_dropout": 0.05, + "response_template": "<|start_header_id|>assistant<|end_header_id|>", + "use_max_len": 2560, + "label_all_assistant": true, + "skip_tool_only_assistant": false, + "assistant_tag": "<|start_header_id|>assistant<|end_header_id|>", + "tool_use_token": "<|use_tool|>" +} \ No newline at end of file