diff --git a/.gitattributes b/.gitattributes
index a6344aac8c09253b3b630fb776ae94478aa0275b..e965b46857df66b774835eed3f1d30b23692dbd4 100644
--- a/.gitattributes
+++ b/.gitattributes
@@ -33,3 +33,8 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
*.zip filter=lfs diff=lfs merge=lfs -text
*.zst filter=lfs diff=lfs merge=lfs -text
*tfevents* filter=lfs diff=lfs merge=lfs -text
+checkpoint-102/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+checkpoint-135/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+checkpoint-34/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+checkpoint-68/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+tokenizer.json filter=lfs diff=lfs merge=lfs -text
diff --git a/README.md b/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..e935f36466895ff5d9a12ceaea17a40b249ac8f1
--- /dev/null
+++ b/README.md
@@ -0,0 +1,162 @@
+---
+library_name: peft
+tags:
+- axolotl
+- base_model:adapter:model
+- lora
+- transformers
+datasets:
+- hardlyworking/HardlyRPv2-10k
+base_model: model
+pipeline_tag: text-generation
+model-index:
+- name: outputs/out
+ results: []
+---
+
+
+
+[
](https://github.com/axolotl-ai-cloud/axolotl)
+See axolotl config
+
+axolotl version: `0.12.0.dev0`
+```yaml
+base_model: model
+
+# Automatically upload checkpoint and final model to HF
+# hub_model_id: username/custom_model_name
+
+plugins:
+ - axolotl.integrations.liger.LigerPlugin
+ - axolotl.integrations.cut_cross_entropy.CutCrossEntropyPlugin
+liger_rope: true
+liger_rms_norm: true
+liger_layer_norm: true
+liger_glu_activation: true
+liger_fused_linear_cross_entropy: false
+cut_cross_entropy: true
+
+load_in_8bit: false
+load_in_4bit: true
+
+# for use with fft to only train on language model layers
+# unfrozen_parameters:
+ # - model.language_model.*
+ # - lm_head
+ # - embed_tokens
+
+
+chat_template: mistral_v7_tekken
+datasets:
+ - path: hardlyworking/HardlyRPv2-10k
+ type: chat_template
+ split: train
+ field_messages: conversations
+ message_property_mappings:
+ role: from
+ content: value
+ user: human
+ assistant: gpt
+
+val_set_size: 0.0
+output_dir: ./outputs/out
+
+adapter: qlora
+lora_r: 32
+lora_alpha: 64
+lora_dropout: 0.05
+# lora_target_linear: # Does not work with gemma3n currently
+lora_target_modules:
+ - self_attn.q_proj
+ - self_attn.k_proj
+ - self_attn.v_proj
+ - self_attn.o_proj
+ - mlp.gate_proj
+ - mlp.up_proj
+ - mlp.down_proj
+
+
+sequence_len: 8192
+sample_packing: true
+eval_sample_packing: true
+pad_to_sequence_len: true
+
+wandb_project:
+wandb_entity:
+wandb_watch:
+wandb_name:
+wandb_log_model:
+
+gradient_accumulation_steps: 16
+micro_batch_size: 1
+num_epochs: 1
+optimizer: adamw_bnb_8bit
+lr_scheduler: cosine
+learning_rate: 0.0002
+
+bf16: auto
+tf32: true
+
+gradient_checkpointing: true
+gradient_checkpointing_kwargs:
+ use_reentrant: false
+ unsloth: true
+resume_from_checkpoint:
+logging_steps: 1
+flash_attention: true
+
+warmup_ratio: 0.1
+evals_per_epoch:
+saves_per_epoch: 4
+weight_decay: 0.0
+special_tokens:
+
+
+```
+
+
+
+# outputs/out
+
+This model was trained from scratch on the hardlyworking/HardlyRPv2-10k dataset.
+
+## Model description
+
+More information needed
+
+## Intended uses & limitations
+
+More information needed
+
+## Training and evaluation data
+
+More information needed
+
+## Training procedure
+
+### Training hyperparameters
+
+The following hyperparameters were used during training:
+- learning_rate: 0.0002
+- train_batch_size: 1
+- eval_batch_size: 1
+- seed: 42
+- gradient_accumulation_steps: 16
+- total_train_batch_size: 16
+- optimizer: Use OptimizerNames.ADAMW_BNB with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments
+- lr_scheduler_type: cosine
+- lr_scheduler_warmup_steps: 13
+- training_steps: 135
+
+### Training results
+
+
+
+### Framework versions
+
+- PEFT 0.17.0
+- Transformers 4.55.0
+- Pytorch 2.7.1+cu126
+- Datasets 4.0.0
+- Tokenizers 0.21.4
\ No newline at end of file
diff --git a/adapter_config.json b/adapter_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..5e245e01f30f611b91cd60ec41553baebc450083
--- /dev/null
+++ b/adapter_config.json
@@ -0,0 +1,42 @@
+{
+ "alpha_pattern": {},
+ "auto_mapping": null,
+ "base_model_name_or_path": "model",
+ "bias": "none",
+ "corda_config": null,
+ "eva_config": null,
+ "exclude_modules": null,
+ "fan_in_fan_out": null,
+ "inference_mode": true,
+ "init_lora_weights": true,
+ "layer_replication": null,
+ "layers_pattern": null,
+ "layers_to_transform": null,
+ "loftq_config": {},
+ "lora_alpha": 64,
+ "lora_bias": false,
+ "lora_dropout": 0.05,
+ "megatron_config": null,
+ "megatron_core": "megatron.core",
+ "modules_to_save": null,
+ "peft_type": "LORA",
+ "qalora_group_size": 16,
+ "r": 32,
+ "rank_pattern": {},
+ "revision": null,
+ "target_modules": [
+ "mlp.up_proj",
+ "self_attn.q_proj",
+ "self_attn.o_proj",
+ "self_attn.k_proj",
+ "mlp.down_proj",
+ "mlp.gate_proj",
+ "self_attn.v_proj"
+ ],
+ "target_parameters": [],
+ "task_type": "CAUSAL_LM",
+ "trainable_token_indices": null,
+ "use_dora": false,
+ "use_qalora": false,
+ "use_rslora": false
+}
\ No newline at end of file
diff --git a/adapter_model.safetensors b/adapter_model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..8a7708f75c6a5bcf64a44bde2ee2caf14f3fce19
--- /dev/null
+++ b/adapter_model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:292f11ea917be88d6cdf82d94a01c2fe0cff748f22bfff68556966618cae8920
+size 739321784
diff --git a/chat_template.jinja b/chat_template.jinja
new file mode 100644
index 0000000000000000000000000000000000000000..b97e2a0976ff559a050c814bc1fed5f72351013d
--- /dev/null
+++ b/chat_template.jinja
@@ -0,0 +1,51 @@
+{%- set today = strftime_now("%Y-%m-%d") %}
+{%- set default_system_message = "You are Mistral Small 3, a Large Language Model (LLM) created by Mistral AI, a French startup headquartered in Paris.\nYour knowledge base was last updated on 2023-10-01. The current date is " + today + ".\n\nWhen you're not sure about some information, you say that you don't have the information and don't make up anything.\nIf the user's question is not clear, ambiguous, or does not provide enough context for you to accurately answer the question, you do not try to answer it right away and you rather ask the user to clarify their request (e.g. \"What are some good restaurants around me?\" => \"Where are you?\" or \"When is the next flight to Tokyo\" => \"Where do you travel from?\")" %}
+
+{{- bos_token }}
+
+{%- if messages[0]['role'] == 'system' %}
+ {%- if messages[0]['content'] is string %}
+ {%- set system_message = messages[0]['content'] %}
+ {%- else %}
+ {%- set system_message = messages[0]['content'][0]['text'] %}
+ {%- endif %}
+ {%- set loop_messages = messages[1:] %}
+{%- else %}
+ {%- set system_message = default_system_message %}
+ {%- set loop_messages = messages %}
+{%- endif %}
+{{- '[SYSTEM_PROMPT]' + system_message + '[/SYSTEM_PROMPT]' }}
+
+{%- for message in loop_messages %}
+ {%- if message['role'] == 'user' %}
+ {%- if message['content'] is string %}
+ {{- '[INST]' + message['content'] + '[/INST]' }}
+ {%- else %}
+ {{- '[INST]' }}
+ {%- for block in message['content'] %}
+ {%- if block['type'] == 'text' %}
+ {{- block['text'] }}
+ {%- elif block['type'] in ['image', 'image_url'] %}
+ {{- '[IMG]' }}
+ {%- else %}
+ {{- raise_exception('Only text and image blocks are supported in message content!') }}
+ {%- endif %}
+ {%- endfor %}
+ {{- '[/INST]' }}
+ {%- endif %}
+ {%- elif message['role'] == 'system' %}
+ {%- if message['content'] is string %}
+ {{- '[SYSTEM_PROMPT]' + message['content'] + '[/SYSTEM_PROMPT]' }}
+ {%- else %}
+ {{- '[SYSTEM_PROMPT]' + message['content'][0]['text'] + '[/SYSTEM_PROMPT]' }}
+ {%- endif %}
+ {%- elif message['role'] == 'assistant' %}
+ {%- if message['content'] is string %}
+ {{- message['content'] + eos_token }}
+ {%- else %}
+ {{- message['content'][0]['text'] + eos_token }}
+ {%- endif %}
+ {%- else %}
+ {{- raise_exception('Only user, system and assistant roles are supported!') }}
+ {%- endif %}
+{%- endfor %}
diff --git a/checkpoint-102/README.md b/checkpoint-102/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..4c3b864e27d253da17ecd520c6fa36952bb73c2d
--- /dev/null
+++ b/checkpoint-102/README.md
@@ -0,0 +1,208 @@
+---
+base_model: model
+library_name: peft
+pipeline_tag: text-generation
+tags:
+- axolotl
+- base_model:adapter:model
+- lora
+- transformers
+---
+
+# Model Card for Model ID
+
+
+
+
+
+## Model Details
+
+### Model Description
+
+
+
+
+
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+
+### Model Sources [optional]
+
+
+
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+
+## Uses
+
+
+
+### Direct Use
+
+
+
+[More Information Needed]
+
+### Downstream Use [optional]
+
+
+
+[More Information Needed]
+
+### Out-of-Scope Use
+
+
+
+[More Information Needed]
+
+## Bias, Risks, and Limitations
+
+
+
+[More Information Needed]
+
+### Recommendations
+
+
+
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+
+## How to Get Started with the Model
+
+Use the code below to get started with the model.
+
+[More Information Needed]
+
+## Training Details
+
+### Training Data
+
+
+
+[More Information Needed]
+
+### Training Procedure
+
+
+
+#### Preprocessing [optional]
+
+[More Information Needed]
+
+
+#### Training Hyperparameters
+
+- **Training regime:** [More Information Needed]
+
+#### Speeds, Sizes, Times [optional]
+
+
+
+[More Information Needed]
+
+## Evaluation
+
+
+
+### Testing Data, Factors & Metrics
+
+#### Testing Data
+
+
+
+[More Information Needed]
+
+#### Factors
+
+
+
+[More Information Needed]
+
+#### Metrics
+
+
+
+[More Information Needed]
+
+### Results
+
+[More Information Needed]
+
+#### Summary
+
+
+
+## Model Examination [optional]
+
+
+
+[More Information Needed]
+
+## Environmental Impact
+
+
+
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+
+## Technical Specifications [optional]
+
+### Model Architecture and Objective
+
+[More Information Needed]
+
+### Compute Infrastructure
+
+[More Information Needed]
+
+#### Hardware
+
+[More Information Needed]
+
+#### Software
+
+[More Information Needed]
+
+## Citation [optional]
+
+
+
+**BibTeX:**
+
+[More Information Needed]
+
+**APA:**
+
+[More Information Needed]
+
+## Glossary [optional]
+
+
+
+[More Information Needed]
+
+## More Information [optional]
+
+[More Information Needed]
+
+## Model Card Authors [optional]
+
+[More Information Needed]
+
+## Model Card Contact
+
+[More Information Needed]
+### Framework versions
+
+- PEFT 0.17.0
\ No newline at end of file
diff --git a/checkpoint-102/adapter_config.json b/checkpoint-102/adapter_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..5e245e01f30f611b91cd60ec41553baebc450083
--- /dev/null
+++ b/checkpoint-102/adapter_config.json
@@ -0,0 +1,42 @@
+{
+ "alpha_pattern": {},
+ "auto_mapping": null,
+ "base_model_name_or_path": "model",
+ "bias": "none",
+ "corda_config": null,
+ "eva_config": null,
+ "exclude_modules": null,
+ "fan_in_fan_out": null,
+ "inference_mode": true,
+ "init_lora_weights": true,
+ "layer_replication": null,
+ "layers_pattern": null,
+ "layers_to_transform": null,
+ "loftq_config": {},
+ "lora_alpha": 64,
+ "lora_bias": false,
+ "lora_dropout": 0.05,
+ "megatron_config": null,
+ "megatron_core": "megatron.core",
+ "modules_to_save": null,
+ "peft_type": "LORA",
+ "qalora_group_size": 16,
+ "r": 32,
+ "rank_pattern": {},
+ "revision": null,
+ "target_modules": [
+ "mlp.up_proj",
+ "self_attn.q_proj",
+ "self_attn.o_proj",
+ "self_attn.k_proj",
+ "mlp.down_proj",
+ "mlp.gate_proj",
+ "self_attn.v_proj"
+ ],
+ "target_parameters": [],
+ "task_type": "CAUSAL_LM",
+ "trainable_token_indices": null,
+ "use_dora": false,
+ "use_qalora": false,
+ "use_rslora": false
+}
\ No newline at end of file
diff --git a/checkpoint-102/adapter_model.safetensors b/checkpoint-102/adapter_model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..d04f44f707945000225194e45446e5ec870f8dd0
--- /dev/null
+++ b/checkpoint-102/adapter_model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c66d8f7119e58ec01367fb85a6abaa1eb47c6e119edeb9a8dd1e5b3393444add
+size 739321784
diff --git a/checkpoint-102/chat_template.jinja b/checkpoint-102/chat_template.jinja
new file mode 100644
index 0000000000000000000000000000000000000000..b97e2a0976ff559a050c814bc1fed5f72351013d
--- /dev/null
+++ b/checkpoint-102/chat_template.jinja
@@ -0,0 +1,51 @@
+{%- set today = strftime_now("%Y-%m-%d") %}
+{%- set default_system_message = "You are Mistral Small 3, a Large Language Model (LLM) created by Mistral AI, a French startup headquartered in Paris.\nYour knowledge base was last updated on 2023-10-01. The current date is " + today + ".\n\nWhen you're not sure about some information, you say that you don't have the information and don't make up anything.\nIf the user's question is not clear, ambiguous, or does not provide enough context for you to accurately answer the question, you do not try to answer it right away and you rather ask the user to clarify their request (e.g. \"What are some good restaurants around me?\" => \"Where are you?\" or \"When is the next flight to Tokyo\" => \"Where do you travel from?\")" %}
+
+{{- bos_token }}
+
+{%- if messages[0]['role'] == 'system' %}
+ {%- if messages[0]['content'] is string %}
+ {%- set system_message = messages[0]['content'] %}
+ {%- else %}
+ {%- set system_message = messages[0]['content'][0]['text'] %}
+ {%- endif %}
+ {%- set loop_messages = messages[1:] %}
+{%- else %}
+ {%- set system_message = default_system_message %}
+ {%- set loop_messages = messages %}
+{%- endif %}
+{{- '[SYSTEM_PROMPT]' + system_message + '[/SYSTEM_PROMPT]' }}
+
+{%- for message in loop_messages %}
+ {%- if message['role'] == 'user' %}
+ {%- if message['content'] is string %}
+ {{- '[INST]' + message['content'] + '[/INST]' }}
+ {%- else %}
+ {{- '[INST]' }}
+ {%- for block in message['content'] %}
+ {%- if block['type'] == 'text' %}
+ {{- block['text'] }}
+ {%- elif block['type'] in ['image', 'image_url'] %}
+ {{- '[IMG]' }}
+ {%- else %}
+ {{- raise_exception('Only text and image blocks are supported in message content!') }}
+ {%- endif %}
+ {%- endfor %}
+ {{- '[/INST]' }}
+ {%- endif %}
+ {%- elif message['role'] == 'system' %}
+ {%- if message['content'] is string %}
+ {{- '[SYSTEM_PROMPT]' + message['content'] + '[/SYSTEM_PROMPT]' }}
+ {%- else %}
+ {{- '[SYSTEM_PROMPT]' + message['content'][0]['text'] + '[/SYSTEM_PROMPT]' }}
+ {%- endif %}
+ {%- elif message['role'] == 'assistant' %}
+ {%- if message['content'] is string %}
+ {{- message['content'] + eos_token }}
+ {%- else %}
+ {{- message['content'][0]['text'] + eos_token }}
+ {%- endif %}
+ {%- else %}
+ {{- raise_exception('Only user, system and assistant roles are supported!') }}
+ {%- endif %}
+{%- endfor %}
diff --git a/checkpoint-102/optimizer.pt b/checkpoint-102/optimizer.pt
new file mode 100644
index 0000000000000000000000000000000000000000..8d86b91ddcf2b798b33d25a2503bdd00a3202fd8
--- /dev/null
+++ b/checkpoint-102/optimizer.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:593d7f5fcf6e4e421762793d533bd3f8719055ce3efd3be1208ec10b91546d78
+size 376056741
diff --git a/checkpoint-102/rng_state.pth b/checkpoint-102/rng_state.pth
new file mode 100644
index 0000000000000000000000000000000000000000..bf11ea18a37f9700cf5a173b8807fbb3a30ae441
--- /dev/null
+++ b/checkpoint-102/rng_state.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8290a10374e21d73cfe0938ec8975684ad4f00a2201486ecbb737e779b98b5f6
+size 14645
diff --git a/checkpoint-102/scheduler.pt b/checkpoint-102/scheduler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..608d9f1e8f270a648234d70cc965263cd8cf5a62
--- /dev/null
+++ b/checkpoint-102/scheduler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:32fc9a04baf6c238edd542dfc9e204d9c241397ad5906691d3d1b4f268f110a6
+size 1465
diff --git a/checkpoint-102/special_tokens_map.json b/checkpoint-102/special_tokens_map.json
new file mode 100644
index 0000000000000000000000000000000000000000..a47054b449916f808ab94e6d2375c00ead78cb8f
--- /dev/null
+++ b/checkpoint-102/special_tokens_map.json
@@ -0,0 +1,1032 @@
+{
+ "additional_special_tokens": [
+ "",
+ "",
+ "",
+ "[INST]",
+ "[/INST]",
+ "[AVAILABLE_TOOLS]",
+ "[/AVAILABLE_TOOLS]",
+ "[TOOL_RESULTS]",
+ "[/TOOL_RESULTS]",
+ "[TOOL_CALLS]",
+ "[IMG]",
+ "",
+ "[IMG_BREAK]",
+ "[IMG_END]",
+ "[PREFIX]",
+ "[MIDDLE]",
+ "[SUFFIX]",
+ "[SYSTEM_PROMPT]",
+ "[/SYSTEM_PROMPT]",
+ "[TOOL_CONTENT]",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "