Upload folder using huggingface_hub
Browse files- .gitattributes +2 -0
- Nanbeige4.1-3B-Report.pdf +3 -0
- README.md +193 -0
- added_tokens.json +9 -0
- chat_template.jinja +137 -0
- config.json +69 -0
- generation_config.json +7 -0
- model.safetensors +3 -0
- recipe.yaml +39 -0
- special_tokens_map.json +33 -0
- tokenizer.json +3 -0
- tokenizer.model +3 -0
- tokenizer_config.json +103 -0
- tokenizer_config_search.json +101 -0
.gitattributes
CHANGED
|
@@ -33,3 +33,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
| 36 |
+
Nanbeige4.1-3B-Report.pdf filter=lfs diff=lfs merge=lfs -text
|
| 37 |
+
tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
Nanbeige4.1-3B-Report.pdf
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8dd7255af85a9fa0be57d250f938e126e603bfb573cb0fed3f02671a85caf331
|
| 3 |
+
size 1278078
|
README.md
ADDED
|
@@ -0,0 +1,193 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
---
|
| 2 |
+
license: apache-2.0
|
| 3 |
+
language:
|
| 4 |
+
- en
|
| 5 |
+
- zh
|
| 6 |
+
library_name: transformers
|
| 7 |
+
pipeline_tag: text-generation
|
| 8 |
+
tags:
|
| 9 |
+
- llm
|
| 10 |
+
- nanbeige
|
| 11 |
+
base_model:
|
| 12 |
+
- Nanbeige/Nanbeige4-3B
|
| 13 |
+
---
|
| 14 |
+
<div align="center">
|
| 15 |
+
|
| 16 |
+
<img src="figures/nbg.png" width="220" alt="Nanbeige Logo">
|
| 17 |
+
</div>
|
| 18 |
+
|
| 19 |
+
|
| 20 |
+
|
| 21 |
+
# Introduction
|
| 22 |
+
|
| 23 |
+
Nanbeige4.1-3B is built upon Nanbeige4-3B-Base and represents an enhanced iteration of our previous reasoning model, Nanbeige4-3B-Thinking-2511, achieved through further post-training optimization with supervised fine-tuning (SFT) and reinforcement learning (RL). As a highly competitive open-source model at a small parameter scale, Nanbeige4.1-3B illustrates that compact models can simultaneously achieve robust **reasoning**, **preference alignment**, and **effective agentic behaviors**.
|
| 24 |
+
|
| 25 |
+
<div align="center">
|
| 26 |
+
|
| 27 |
+
<img src="figures/model_performance_comparison.png">
|
| 28 |
+
</div>
|
| 29 |
+
|
| 30 |
+
Specifically, Nanbeige4.1-3B exhibits the following key strengths:
|
| 31 |
+
|
| 32 |
+
* **Strong Reasoning:** Nanbeige4.1-3B is capable of solving complex, multi-step problems through sustained and coherent reasoning within a single forward pass, and reliably produces correct final answers on challenging tasks such as LiveCodeBench-Pro, IMO-Answer-Bench, and AIME 2026 I.
|
| 33 |
+
* **Robust Preference Alignment:** Nanbeige4.1-3B achieves solid alignment performance, outperforming not only same-scale models such as Qwen3-4B-2507 and Nanbeige4-3B-2511, but also substantially larger models including Qwen3-30B-A3B and Qwen3-32B on Arena-Hard-v2 and Multi-Challenge.
|
| 34 |
+
* **Agentic Capability:** Nanbeige4.1-3B is the first general small model to natively support deep-search tasks and reliably sustain complex problem solving involving more than 500 rounds of tool invocations. It fills a long-standing gap in the small-model ecosystem where models are typically optimized for either general reasoning or agentic scenarios, but rarely excel at both.
|
| 35 |
+
|
| 36 |
+
> **Technical Report:** [Link](https://huggingface.co/Nanbeige/Nanbeige4.1-3B/blob/main/Nanbeige4.1-3B-Report.pdf)
|
| 37 |
+
|
| 38 |
+
|
| 39 |
+
|
| 40 |
+
|
| 41 |
+
# Performances
|
| 42 |
+
|
| 43 |
+
We evaluate Nanbeige4.1-3B across a broad and diverse set of benchmarks covering **general reasoning**, and **deep-search capabilities**.
|
| 44 |
+
|
| 45 |
+
### General Reasoning Tasks
|
| 46 |
+
|
| 47 |
+
On general reasoning tasks including **code**, **math**, **science**, **alignment**, and **tool-use** benchmarks, Nanbeige4.1-3B not only significantly outperforms same-scale models such as **Qwen3-4B**, but also demonstrates overall superior performance compared to larger models including **Qwen3-30B-A3B-2507** and **Qwen3-32B**.
|
| 48 |
+
|
| 49 |
+
|
| 50 |
+
| Benchmark | Qwen3-4B-2507 | Qwen3-8B | Qwen3-14B | Qwen3-32B | Qwen3-30B-A3B-2507 | Nanbeige4-3B-2511 | **Nanbeige4.1-3B** |
|
| 51 |
+
| --------------------------- | ------------- | -------- | --------- | --------- | ------------------ | ----------------- | ------------------ |
|
| 52 |
+
| **Code** | | | | | | | |
|
| 53 |
+
| Live-Code-Bench-V6 | 57.4 | 49.4 | 55.9 | 55.7 | <u>66.0<u> | 46.0 | **76.9** |
|
| 54 |
+
| Live-Code-Bench-Pro-Easy | 40.2 | 41.2 | 33.0 | 42.3 | <u>60.8<u> | 40.2 | **81.4** |
|
| 55 |
+
| Live-Code-Bench-Pro-Mediium | 5.3 | 3.5 | 1.8 | 3.5 | 3.5 | <u>5.3<u> | **28.1** |
|
| 56 |
+
| **Math** | | | | | | | |
|
| 57 |
+
| AIME 2026 I | 81.46 | 70.42 | 76.46 | 75.83 | <u>87.30<u> | 84.1 | **87.40** |
|
| 58 |
+
| HMMT Nov | 68.33 | 48.33 | 56.67 | 57.08 | <u>71.25<u> | 66.67 | **77.92** |
|
| 59 |
+
| IMO-Answer-Bench | 48.00 | 36.56 | 41.81 | 43.94 | **54.34** | 38.25 | 53.38 |
|
| 60 |
+
| **Science** | | | | | | | |
|
| 61 |
+
| GPQA | 65.8 | 62.0 | 63.38 | 68.4 | 73.4 | <u>82.2<u> | **83.8** |
|
| 62 |
+
| HLE (Text-only) | 6.72 | 5.28 | 7.00 | 9.31 | <u>11.77<u> | 10.98 | **12.60** |
|
| 63 |
+
| **Alignment** | | | | | | | |
|
| 64 |
+
| Arena-Hard-v2 | 34.9 | 26.3 | 36.9 | 56.0 | <u>60.2<u> | 60.0 | **73.2** |
|
| 65 |
+
| Multi-Challenge | 41.14 | 36.30 | 36.97 | 38.72 | <u>49.40<u> | 41.20 | **52.21** |
|
| 66 |
+
| **Tool Use** | | | | | | | |
|
| 67 |
+
| BFCL-V4 | 44.87 | 42.20 | 45.14 | 47.90 | 48.6 | <u>53.8<u> | **56.50** |
|
| 68 |
+
| Tau2-Bench | 45.9 | 42.06 | 44.96 | 45.26 | <u> 47.70<u> | 41.77 | **48.57** |
|
| 69 |
+
|
| 70 |
+
|
| 71 |
+
|
| 72 |
+
### Deep Search Tasks
|
| 73 |
+
|
| 74 |
+
As a general small model, Nanbeige4.1-3B achieves deep-search performance comparable to specialized agents under 10B parameters.
|
| 75 |
+
In contrast to existing small general models, which typically exhibit little to no deep-search capability, Nanbeige4.1-3B represents a substantial qualitative improvement over prior small general models.
|
| 76 |
+
|
| 77 |
+
#### Deep Search and Agent Benchmarks
|
| 78 |
+
| Model | xBench-DeepSearch-2505 | xBench-DeepSearch-2510 | Browse-Comp | Browse-Comp-ZH | GAIA (Text-only) | HLE | SEAL-0 |
|
| 79 |
+
|------|-------------------|-------------------|-------------|----------------|------------------|-----|--------|
|
| 80 |
+
| **Search-Specialized Small Agents** ||||||||
|
| 81 |
+
| MiroThinker-v1.0-8B | 61 | – | 31.1 | 40.2 | 66.4 | 21.5 | 40.4 |
|
| 82 |
+
| AgentCPM-Explore-4B | 70 | – | 25.0 | 29.0 | 63.9 | 19.1 | 40.0 |
|
| 83 |
+
| **Large Foundation Models (with Tools)** ||||||||
|
| 84 |
+
| GLM-4.6-357B | 70 | – | 45.1 | 49.5 | 71.9 | 30.4 | – |
|
| 85 |
+
| Minimax-M2-230B | 72 | – | 44.0 | 48.5 | 75.7 | 31.8 | – |
|
| 86 |
+
| DeepSeek-V3.2-671B | 71 | – | 67.6 | 65.0 | 63.5 | 40.8 | 38.5 |
|
| 87 |
+
| **Small Foundation Models (with Tools)** ||||||||
|
| 88 |
+
| Qwen3-4B-2507 | 34 | 5 | 1.57 | 7.92 | 28.33 | 11.13 | <u>15.74<u> |
|
| 89 |
+
| Qwen3-8B | 31 | 2 | 0.79 | 5.15 | 19.53 | 10.24 | 6.34 |
|
| 90 |
+
| Qwen3-14B | 34 | 9 | 2.36 | 7.11 | 30.23 | 10.17 | 12.64 |
|
| 91 |
+
| Qwen3-32B | <u>39<u> | 8 | <u>3.15<u> | <u>7.34<u> | 30.17 | 9.26 | 8.15 |
|
| 92 |
+
| Qwen3-30B-A3B-2507 | 25 | 10| 1.57 | 4.12 | <u>31.63<u> | <u>14.81<u> | 9.24 |
|
| 93 |
+
| **Ours (with Tools)** ||||||||
|
| 94 |
+
| Nanbeige4-3B-2511 | 33 | <u>11<u> | 0.79 | 3.09 | 19.42 | 13.89 | 12.61 |
|
| 95 |
+
| **Nanbeige4.1-3B** | **75** | **39** | **19.12** | **31.83** | **69.90** | **22.29** | **41.44** |
|
| 96 |
+
|
| 97 |
+
|
| 98 |
+
## <span id="Inference">Quickstart</span>
|
| 99 |
+
|
| 100 |
+
For inference hyperparameters, we recommend the following settings:
|
| 101 |
+
* Temperature: 0.6
|
| 102 |
+
* Top-p: 0.95
|
| 103 |
+
* Repeat penalty: 1.0
|
| 104 |
+
* Max New Tokens: 131072
|
| 105 |
+
|
| 106 |
+
For the chat scenario:
|
| 107 |
+
```
|
| 108 |
+
from transformers import AutoModelForCausalLM, AutoTokenizer
|
| 109 |
+
tokenizer = AutoTokenizer.from_pretrained(
|
| 110 |
+
'Nanbeige/Nanbeige4.1-3B',
|
| 111 |
+
use_fast=False,
|
| 112 |
+
trust_remote_code=True
|
| 113 |
+
)
|
| 114 |
+
model = AutoModelForCausalLM.from_pretrained(
|
| 115 |
+
'Nanbeige/Nanbeige4.1-3B',
|
| 116 |
+
torch_dtype='auto',
|
| 117 |
+
device_map='auto',
|
| 118 |
+
trust_remote_code=True
|
| 119 |
+
)
|
| 120 |
+
messages = [
|
| 121 |
+
{'role': 'user', 'content': 'Which number is bigger, 9.11 or 9.8?'}
|
| 122 |
+
]
|
| 123 |
+
prompt = tokenizer.apply_chat_template(
|
| 124 |
+
messages,
|
| 125 |
+
add_generation_prompt=True,
|
| 126 |
+
tokenize=False
|
| 127 |
+
)
|
| 128 |
+
input_ids = tokenizer(prompt, add_special_tokens=False, return_tensors='pt').input_ids
|
| 129 |
+
output_ids = model.generate(input_ids.to('cuda'), eos_token_id=166101)
|
| 130 |
+
resp = tokenizer.decode(output_ids[0][len(input_ids[0]):], skip_special_tokens=True)
|
| 131 |
+
print(resp)
|
| 132 |
+
```
|
| 133 |
+
|
| 134 |
+
For the tool use scenario:
|
| 135 |
+
```
|
| 136 |
+
from transformers import AutoModelForCausalLM, AutoTokenizer
|
| 137 |
+
tokenizer = AutoTokenizer.from_pretrained(
|
| 138 |
+
'Nanbeige/Nanbeige4.1-3B',
|
| 139 |
+
use_fast=False,
|
| 140 |
+
trust_remote_code=True
|
| 141 |
+
)
|
| 142 |
+
model = AutoModelForCausalLM.from_pretrained(
|
| 143 |
+
'Nanbeige/Nanbeige4.1-3B',
|
| 144 |
+
torch_dtype='auto',
|
| 145 |
+
device_map='auto',
|
| 146 |
+
trust_remote_code=True
|
| 147 |
+
)
|
| 148 |
+
messages = [
|
| 149 |
+
{'role': 'user', 'content': 'Help me check the weather in Beijing now'}
|
| 150 |
+
]
|
| 151 |
+
tools = [{'type': 'function',
|
| 152 |
+
'function': {'name': 'SearchWeather',
|
| 153 |
+
'description': 'Find out the current weather in a place on a certain day.',
|
| 154 |
+
'parameters': {'type': 'dict',
|
| 155 |
+
'properties': {'location': {'type': 'string',
|
| 156 |
+
'description': 'A city in China.'},
|
| 157 |
+
'required': ['location']}}}}]
|
| 158 |
+
prompt = tokenizer.apply_chat_template(
|
| 159 |
+
messages,
|
| 160 |
+
tools,
|
| 161 |
+
add_generation_prompt=True,
|
| 162 |
+
tokenize=False
|
| 163 |
+
)
|
| 164 |
+
input_ids = tokenizer(prompt, add_special_tokens=False, return_tensors='pt').input_ids
|
| 165 |
+
output_ids = model.generate(input_ids.to('cuda'), eos_token_id=166101)
|
| 166 |
+
resp = tokenizer.decode(output_ids[0][len(input_ids[0]):], skip_special_tokens=True)
|
| 167 |
+
print(resp)
|
| 168 |
+
```
|
| 169 |
+
|
| 170 |
+
For the deep-search scenario:
|
| 171 |
+
|
| 172 |
+
* Inference Framework: [**miroflow-framework**](https://github.com/MiroMindAI/MiroThinker)!
|
| 173 |
+
* Switch tokenizer configuration to **tokenizer_config_search.json**
|
| 174 |
+
* Tools Configuration:
|
| 175 |
+
|
| 176 |
+
| Server | Description | Tools Provided |
|
| 177 |
+
|-----------------------------|-----------------------------------------------------------------------------|-------------------------------------------------------------------------------|
|
| 178 |
+
| tool-python | Execution environment and file management ([E2B sandbox](https://e2b.dev/)) | create_sandbox, run_command, run_python_code, upload_file_from_local_to_sandbox, download_file_from_sandbox_to_local, download_file_from_internet_to_sandbox |
|
| 179 |
+
| search_and_scrape_webpage | Google search via [Serper API](https://google.serper.dev) | google_search |
|
| 180 |
+
| jina_scrape_llm_summary | Web scraping with LLM-based information extraction with [Jina](https://r.jina.ai) | scrape_and_extract_info |
|
| 181 |
+
|
| 182 |
+
* Summary model: Qwen3-14B-thinking
|
| 183 |
+
* Temperature: 1.0
|
| 184 |
+
* Note, access to HuggingFace has been explicitly disabled in these tools.
|
| 185 |
+
|
| 186 |
+
# <span id="Limitations">Limitations</span>
|
| 187 |
+
|
| 188 |
+
While we place great emphasis on the safety of the model during the training process, striving to ensure that its outputs align with ethical and legal requirements, it may not completely avoid generating unexpected outputs due to the model's size and probabilistic nature. These outputs may include harmful content such as bias or discrimination. Please don't propagate such content. We do not assume any responsibility for the consequences resulting from the dissemination of inappropriate information.
|
| 189 |
+
<br>
|
| 190 |
+
|
| 191 |
+
# <span id="Limitations">Contact</span>
|
| 192 |
+
If you have any questions, please raise an issue or contact us at nanbeige@kanzhun.com.
|
| 193 |
+
<br>
|
added_tokens.json
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"</think>": 166104,
|
| 3 |
+
"</tool_call>": 166106,
|
| 4 |
+
"<think>": 166103,
|
| 5 |
+
"<tool_call>": 166105,
|
| 6 |
+
"<|endoftext|>": 166102,
|
| 7 |
+
"<|im_end|>": 166101,
|
| 8 |
+
"<|im_start|>": 166100
|
| 9 |
+
}
|
chat_template.jinja
ADDED
|
@@ -0,0 +1,137 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
|
| 2 |
+
{%- if tools %}
|
| 3 |
+
{{- '<|im_start|>system
|
| 4 |
+
' }}
|
| 5 |
+
{%- if messages[0].role == 'system' %}
|
| 6 |
+
{{- messages[0].content + '
|
| 7 |
+
|
| 8 |
+
' }}
|
| 9 |
+
{%- else %}
|
| 10 |
+
{{- '你是一位工具函数调用专家,你会得到一个问题和一组可能的工具函数。根据问题,你需要进行一个或多个函数/工具调用以实现目的,请尽量尝试探索通过工具解决问题。
|
| 11 |
+
如果没有一个函数可以使用,请直接使用自然语言回复用户。
|
| 12 |
+
如果给定的问题缺少函数所需的参数,请使用自然语言进行提问,向用户询问必要信息。
|
| 13 |
+
如果调用结果已经足够回答用户问题,请对历史结果进行总结,使用自然语言回复用户。' }}
|
| 14 |
+
{%- endif %}
|
| 15 |
+
{{- "# Tools
|
| 16 |
+
|
| 17 |
+
You may call one or more functions to assist with the user query.
|
| 18 |
+
|
| 19 |
+
You are provided with function signatures within <tools></tools> XML tags:
|
| 20 |
+
<tools>" }}
|
| 21 |
+
{%- for tool in tools %}
|
| 22 |
+
{{- "
|
| 23 |
+
" }}
|
| 24 |
+
{{- tool | tojson }}
|
| 25 |
+
{%- endfor %}
|
| 26 |
+
{{- "
|
| 27 |
+
</tools>
|
| 28 |
+
|
| 29 |
+
For each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:
|
| 30 |
+
<tool_call>
|
| 31 |
+
{\"name\": <function-name>, \"arguments\": <args-json-object>}
|
| 32 |
+
</tool_call><|im_end|>
|
| 33 |
+
" }}
|
| 34 |
+
{%- else %}
|
| 35 |
+
{%- if messages[0].role == 'system' %}
|
| 36 |
+
{{- '<|im_start|>system
|
| 37 |
+
' + messages[0].content + '<|im_end|>
|
| 38 |
+
' }}
|
| 39 |
+
{%- else %}
|
| 40 |
+
{{- '<|im_start|>system
|
| 41 |
+
你是南北阁,一款由BOSS直聘自主研发并训练的专业大语言模型。<|im_end|>
|
| 42 |
+
' }}
|
| 43 |
+
{%- endif %}
|
| 44 |
+
{%- endif %}
|
| 45 |
+
{%- set ns = namespace(multi_step_tool=true, last_query_index=messages|length - 1) %}
|
| 46 |
+
{%- for message in messages[::-1] %}
|
| 47 |
+
{%- set index = (messages|length - 1) - loop.index0 %}
|
| 48 |
+
{%- if ns.multi_step_tool and message.role == "user" and message.content is string and not(message.content.startswith('<tool_response>') and message.content.endswith('</tool_response>')) %}
|
| 49 |
+
{%- set ns.multi_step_tool = false %}
|
| 50 |
+
{%- set ns.last_query_index = index %}
|
| 51 |
+
{%- endif %}
|
| 52 |
+
{%- endfor %}
|
| 53 |
+
{%- for message in messages %}
|
| 54 |
+
{%- if message.content is string %}
|
| 55 |
+
{%- set content = message.content %}
|
| 56 |
+
{%- else %}
|
| 57 |
+
{%- set content = '' %}
|
| 58 |
+
{%- endif %}
|
| 59 |
+
{%- if (message.role == "user") or (message.role == "system" and not loop.first) %}
|
| 60 |
+
{{- '<|im_start|>' + message.role + '
|
| 61 |
+
' + content + '<|im_end|>' + '
|
| 62 |
+
' }}
|
| 63 |
+
{%- elif message.role == "assistant" %}
|
| 64 |
+
{%- set reasoning_content = '' %}
|
| 65 |
+
{%- if message.reasoning_content is string %}
|
| 66 |
+
{%- set reasoning_content = message.reasoning_content %}
|
| 67 |
+
{%- else %}
|
| 68 |
+
{%- if '</think>' in content %}
|
| 69 |
+
{%- set reasoning_content = content.split('</think>')[0].rstrip('
|
| 70 |
+
').split('<think>')[-1].lstrip('
|
| 71 |
+
') %}
|
| 72 |
+
{%- set content = content.split('</think>')[-1].lstrip('
|
| 73 |
+
') %}
|
| 74 |
+
{%- endif %}
|
| 75 |
+
{%- endif %}
|
| 76 |
+
{%- if loop.index0 > ns.last_query_index or keep_all_think or (extra_body is defined and extra_body.keep_all_think) %}
|
| 77 |
+
{%- if loop.last or (not loop.last and reasoning_content) %}
|
| 78 |
+
{{- '<|im_start|>' + message.role + '
|
| 79 |
+
<think>
|
| 80 |
+
' + reasoning_content.strip('
|
| 81 |
+
') + '
|
| 82 |
+
</think>
|
| 83 |
+
|
| 84 |
+
' + content.lstrip('
|
| 85 |
+
') }}
|
| 86 |
+
{%- else %}
|
| 87 |
+
{{- '<|im_start|>' + message.role + '
|
| 88 |
+
' + content }}
|
| 89 |
+
{%- endif %}
|
| 90 |
+
{%- else %}
|
| 91 |
+
{{- '<|im_start|>' + message.role + '
|
| 92 |
+
' + content }}
|
| 93 |
+
{%- endif %}
|
| 94 |
+
{%- if message.tool_calls %}
|
| 95 |
+
{%- for tool_call in message.tool_calls %}
|
| 96 |
+
{%- if (loop.first and content) or (not loop.first) %}
|
| 97 |
+
{{- '
|
| 98 |
+
' }}
|
| 99 |
+
{%- endif %}
|
| 100 |
+
{%- if tool_call.function %}
|
| 101 |
+
{%- set tool_call = tool_call.function %}
|
| 102 |
+
{%- endif %}
|
| 103 |
+
{{- '<tool_call>
|
| 104 |
+
{"name": "' }}
|
| 105 |
+
{{- tool_call.name }}
|
| 106 |
+
{{- '", "arguments": ' }}
|
| 107 |
+
{%- if tool_call.arguments is string %}
|
| 108 |
+
{{- tool_call.arguments }}
|
| 109 |
+
{%- else %}
|
| 110 |
+
{{- tool_call.arguments | tojson }}
|
| 111 |
+
{%- endif %}
|
| 112 |
+
{{- '}
|
| 113 |
+
</tool_call>' }}
|
| 114 |
+
{%- endfor %}
|
| 115 |
+
{%- endif %}
|
| 116 |
+
{{- '<|im_end|>
|
| 117 |
+
' }}
|
| 118 |
+
{%- elif message.role == "tool" %}
|
| 119 |
+
{%- if loop.first or (messages[loop.index0 - 1].role != "tool") %}
|
| 120 |
+
{{- '<|im_start|>user' }}
|
| 121 |
+
{%- endif %}
|
| 122 |
+
{{- '
|
| 123 |
+
<tool_response>
|
| 124 |
+
' }}
|
| 125 |
+
{{- content }}
|
| 126 |
+
{{- '
|
| 127 |
+
</tool_response>' }}
|
| 128 |
+
{%- if loop.last or (messages[loop.index0 + 1].role != "tool") %}
|
| 129 |
+
{{- '<|im_end|>
|
| 130 |
+
' }}
|
| 131 |
+
{%- endif %}
|
| 132 |
+
{%- endif %}
|
| 133 |
+
{%- endfor %}
|
| 134 |
+
{%- if add_generation_prompt %}
|
| 135 |
+
{{- '<|im_start|>assistant
|
| 136 |
+
' }}
|
| 137 |
+
{%- endif %}
|
config.json
ADDED
|
@@ -0,0 +1,69 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"architectures": [
|
| 3 |
+
"LlamaForCausalLM"
|
| 4 |
+
],
|
| 5 |
+
"attention_bias": false,
|
| 6 |
+
"attention_dropout": 0.0,
|
| 7 |
+
"bos_token_id": 166100,
|
| 8 |
+
"dtype": "bfloat16",
|
| 9 |
+
"embd_pdrop": 0.0,
|
| 10 |
+
"eos_token_id": 166101,
|
| 11 |
+
"head_dim": 128,
|
| 12 |
+
"hidden_act": "silu",
|
| 13 |
+
"hidden_size": 2560,
|
| 14 |
+
"initializer_range": 0.02,
|
| 15 |
+
"intermediate_size": 10496,
|
| 16 |
+
"max_position_embeddings": 262144,
|
| 17 |
+
"mlp_bias": false,
|
| 18 |
+
"model_type": "llama",
|
| 19 |
+
"num_attention_heads": 20,
|
| 20 |
+
"num_hidden_layers": 32,
|
| 21 |
+
"num_key_value_heads": 4,
|
| 22 |
+
"pad_token_id": 0,
|
| 23 |
+
"pretraining_tp": 1,
|
| 24 |
+
"quantization_config": {
|
| 25 |
+
"config_groups": {
|
| 26 |
+
"group_0": {
|
| 27 |
+
"format": "pack-quantized",
|
| 28 |
+
"input_activations": null,
|
| 29 |
+
"output_activations": null,
|
| 30 |
+
"targets": [
|
| 31 |
+
"Linear"
|
| 32 |
+
],
|
| 33 |
+
"weights": {
|
| 34 |
+
"actorder": null,
|
| 35 |
+
"block_structure": null,
|
| 36 |
+
"dynamic": false,
|
| 37 |
+
"group_size": 32,
|
| 38 |
+
"num_bits": 8,
|
| 39 |
+
"observer": "mse",
|
| 40 |
+
"observer_kwargs": {},
|
| 41 |
+
"scale_dtype": null,
|
| 42 |
+
"strategy": "group",
|
| 43 |
+
"symmetric": true,
|
| 44 |
+
"type": "int",
|
| 45 |
+
"zp_dtype": null
|
| 46 |
+
}
|
| 47 |
+
}
|
| 48 |
+
},
|
| 49 |
+
"format": "pack-quantized",
|
| 50 |
+
"global_compression_ratio": null,
|
| 51 |
+
"ignore": [
|
| 52 |
+
"lm_head"
|
| 53 |
+
],
|
| 54 |
+
"kv_cache_scheme": null,
|
| 55 |
+
"quant_method": "compressed-tensors",
|
| 56 |
+
"quantization_status": "compressed",
|
| 57 |
+
"sparsity_config": {},
|
| 58 |
+
"transform_config": {},
|
| 59 |
+
"version": "0.13.1.a20260212"
|
| 60 |
+
},
|
| 61 |
+
"resid_pdrop": 0.0,
|
| 62 |
+
"rms_norm_eps": 1e-05,
|
| 63 |
+
"rope_scaling": null,
|
| 64 |
+
"rope_theta": 70000000,
|
| 65 |
+
"tie_word_embeddings": false,
|
| 66 |
+
"transformers_version": "4.57.3",
|
| 67 |
+
"use_cache": true,
|
| 68 |
+
"vocab_size": 166144
|
| 69 |
+
}
|
generation_config.json
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"_from_model_config": true,
|
| 3 |
+
"bos_token_id": 166100,
|
| 4 |
+
"eos_token_id": 166101,
|
| 5 |
+
"pad_token_id": 0,
|
| 6 |
+
"transformers_version": "4.51.3"
|
| 7 |
+
}
|
model.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:562e1208eaa7e8050778b1a18978917e6728735e6d6b55ccb97b7caeff96f484
|
| 3 |
+
size 4977224448
|
recipe.yaml
ADDED
|
@@ -0,0 +1,39 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
default_stage:
|
| 2 |
+
default_modifiers:
|
| 3 |
+
AWQModifier:
|
| 4 |
+
config_groups:
|
| 5 |
+
group_0:
|
| 6 |
+
targets: [Linear]
|
| 7 |
+
weights:
|
| 8 |
+
num_bits: 8
|
| 9 |
+
type: int
|
| 10 |
+
symmetric: true
|
| 11 |
+
group_size: 32
|
| 12 |
+
strategy: group
|
| 13 |
+
block_structure: null
|
| 14 |
+
dynamic: false
|
| 15 |
+
actorder: null
|
| 16 |
+
scale_dtype: null
|
| 17 |
+
zp_dtype: null
|
| 18 |
+
observer: mse
|
| 19 |
+
observer_kwargs: {}
|
| 20 |
+
input_activations: null
|
| 21 |
+
output_activations: null
|
| 22 |
+
format: null
|
| 23 |
+
targets: [Linear]
|
| 24 |
+
ignore: [model.embed_tokens, model.norm, lm_head]
|
| 25 |
+
mappings:
|
| 26 |
+
- smooth_layer: re:.*input_layernorm$
|
| 27 |
+
balance_layers: ['re:.*q_proj$', 're:.*k_proj$', 're:.*v_proj$']
|
| 28 |
+
activation_hook_target: null
|
| 29 |
+
- smooth_layer: re:.*v_proj$
|
| 30 |
+
balance_layers: ['re:.*o_proj$']
|
| 31 |
+
activation_hook_target: null
|
| 32 |
+
- smooth_layer: re:.*post_attention_layernorm$
|
| 33 |
+
balance_layers: ['re:.*gate_proj$', 're:.*up_proj$']
|
| 34 |
+
activation_hook_target: null
|
| 35 |
+
- smooth_layer: re:.*up_proj$
|
| 36 |
+
balance_layers: ['re:.*down_proj$']
|
| 37 |
+
activation_hook_target: null
|
| 38 |
+
duo_scaling: true
|
| 39 |
+
n_grid: 20
|
special_tokens_map.json
ADDED
|
@@ -0,0 +1,33 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"additional_special_tokens": [
|
| 3 |
+
"<|endoftext|>"
|
| 4 |
+
],
|
| 5 |
+
"bos_token": {
|
| 6 |
+
"content": "<|im_start|>",
|
| 7 |
+
"lstrip": false,
|
| 8 |
+
"normalized": false,
|
| 9 |
+
"rstrip": false,
|
| 10 |
+
"single_word": false
|
| 11 |
+
},
|
| 12 |
+
"eos_token": {
|
| 13 |
+
"content": "<|im_end|>",
|
| 14 |
+
"lstrip": false,
|
| 15 |
+
"normalized": false,
|
| 16 |
+
"rstrip": false,
|
| 17 |
+
"single_word": false
|
| 18 |
+
},
|
| 19 |
+
"pad_token": {
|
| 20 |
+
"content": "<unk>",
|
| 21 |
+
"lstrip": false,
|
| 22 |
+
"normalized": true,
|
| 23 |
+
"rstrip": false,
|
| 24 |
+
"single_word": false
|
| 25 |
+
},
|
| 26 |
+
"unk_token": {
|
| 27 |
+
"content": "<unk>",
|
| 28 |
+
"lstrip": false,
|
| 29 |
+
"normalized": true,
|
| 30 |
+
"rstrip": false,
|
| 31 |
+
"single_word": false
|
| 32 |
+
}
|
| 33 |
+
}
|
tokenizer.json
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1d8f0326910136aca20831249220b38ce5299527647bc8c6b65404485c479740
|
| 3 |
+
size 18451122
|
tokenizer.model
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:fb41d04798b714520a9b075727b0226538b7330254299062742c50ec8374bc36
|
| 3 |
+
size 2782298
|
tokenizer_config.json
ADDED
|
@@ -0,0 +1,103 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"add_bos_token": true,
|
| 3 |
+
"add_eos_token": false,
|
| 4 |
+
"add_prefix_space": true,
|
| 5 |
+
"added_tokens_decoder": {
|
| 6 |
+
"0": {
|
| 7 |
+
"content": "<unk>",
|
| 8 |
+
"lstrip": false,
|
| 9 |
+
"normalized": true,
|
| 10 |
+
"rstrip": false,
|
| 11 |
+
"single_word": false,
|
| 12 |
+
"special": true
|
| 13 |
+
},
|
| 14 |
+
"1": {
|
| 15 |
+
"content": "<s>",
|
| 16 |
+
"lstrip": false,
|
| 17 |
+
"normalized": true,
|
| 18 |
+
"rstrip": false,
|
| 19 |
+
"single_word": false,
|
| 20 |
+
"special": true
|
| 21 |
+
},
|
| 22 |
+
"2": {
|
| 23 |
+
"content": "</s>",
|
| 24 |
+
"lstrip": false,
|
| 25 |
+
"normalized": true,
|
| 26 |
+
"rstrip": false,
|
| 27 |
+
"single_word": false,
|
| 28 |
+
"special": true
|
| 29 |
+
},
|
| 30 |
+
"166100": {
|
| 31 |
+
"content": "<|im_start|>",
|
| 32 |
+
"lstrip": false,
|
| 33 |
+
"normalized": false,
|
| 34 |
+
"rstrip": false,
|
| 35 |
+
"single_word": false,
|
| 36 |
+
"special": true
|
| 37 |
+
},
|
| 38 |
+
"166101": {
|
| 39 |
+
"content": "<|im_end|>",
|
| 40 |
+
"lstrip": false,
|
| 41 |
+
"normalized": false,
|
| 42 |
+
"rstrip": false,
|
| 43 |
+
"single_word": false,
|
| 44 |
+
"special": true
|
| 45 |
+
},
|
| 46 |
+
"166102": {
|
| 47 |
+
"content": "<|endoftext|>",
|
| 48 |
+
"lstrip": false,
|
| 49 |
+
"normalized": false,
|
| 50 |
+
"rstrip": false,
|
| 51 |
+
"single_word": false,
|
| 52 |
+
"special": true
|
| 53 |
+
},
|
| 54 |
+
"166103": {
|
| 55 |
+
"content": "<think>",
|
| 56 |
+
"lstrip": false,
|
| 57 |
+
"normalized": true,
|
| 58 |
+
"rstrip": false,
|
| 59 |
+
"single_word": false,
|
| 60 |
+
"special": false
|
| 61 |
+
},
|
| 62 |
+
"166104": {
|
| 63 |
+
"content": "</think>",
|
| 64 |
+
"lstrip": false,
|
| 65 |
+
"normalized": true,
|
| 66 |
+
"rstrip": false,
|
| 67 |
+
"single_word": false,
|
| 68 |
+
"special": false
|
| 69 |
+
},
|
| 70 |
+
"166105": {
|
| 71 |
+
"content": "<tool_call>",
|
| 72 |
+
"lstrip": false,
|
| 73 |
+
"normalized": true,
|
| 74 |
+
"rstrip": false,
|
| 75 |
+
"single_word": false,
|
| 76 |
+
"special": false
|
| 77 |
+
},
|
| 78 |
+
"166106": {
|
| 79 |
+
"content": "</tool_call>",
|
| 80 |
+
"lstrip": false,
|
| 81 |
+
"normalized": true,
|
| 82 |
+
"rstrip": false,
|
| 83 |
+
"single_word": false,
|
| 84 |
+
"special": false
|
| 85 |
+
}
|
| 86 |
+
},
|
| 87 |
+
"additional_special_tokens": [
|
| 88 |
+
"<|endoftext|>"
|
| 89 |
+
],
|
| 90 |
+
"bos_token": "<|im_start|>",
|
| 91 |
+
"chat_template": "\n {%- if tools %}\n {{- '<|im_start|>system\n' }}\n {%- if messages[0].role == 'system' %}\n {{- messages[0].content + '\n\n' }}\n {%- else %} \n {{- '你是一位工具函数调用专家,你会得到一个问题和一组可能的工具函数。根据问题,你需要进行一个或多个函数/工具调用以实现目的,请尽量尝试探索通过工具解决问题。\n如果没有一个函数可以使用,请直接使用自然语言回复用户。\n如果给定的问题缺少函数所需的参数,请使用自然语言进行提问,向用户询问必要信息。\n如果调用结果已经足够回答用户问题,请对历史结果进行总结,使用自然语言回复用户。' }} \n {%- endif %}\n {{- \"# Tools\n\nYou may call one or more functions to assist with the user query.\n\nYou are provided with function signatures within <tools></tools> XML tags:\n<tools>\" }}\n {%- for tool in tools %}\n {{- \"\n\" }}\n {{- tool | tojson }}\n {%- endfor %}\n {{- \"\n</tools>\n\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\n<tool_call>\n{\\\"name\\\": <function-name>, \\\"arguments\\\": <args-json-object>}\n</tool_call><|im_end|>\n\" }}\n {%- else %}\n {%- if messages[0].role == 'system' %}\n {{- '<|im_start|>system\n' + messages[0].content + '<|im_end|>\n' }}\n {%- else %} \n {{- '<|im_start|>system\n你是南北阁,一款由BOSS直聘自主研发并训练的专业大语言模型。<|im_end|>\n' }} \n {%- endif %}\n {%- endif %}\n {%- set ns = namespace(multi_step_tool=true, last_query_index=messages|length - 1) %}\n {%- for message in messages[::-1] %}\n {%- set index = (messages|length - 1) - loop.index0 %}\n {%- if ns.multi_step_tool and message.role == \"user\" and message.content is string and not(message.content.startswith('<tool_response>') and message.content.endswith('</tool_response>')) %}\n {%- set ns.multi_step_tool = false %}\n {%- set ns.last_query_index = index %}\n {%- endif %}\n {%- endfor %}\n {%- for message in messages %}\n {%- if message.content is string %}\n {%- set content = message.content %}\n {%- else %}\n {%- set content = '' %}\n {%- endif %}\n {%- if (message.role == \"user\") or (message.role == \"system\" and not loop.first) %}\n {{- '<|im_start|>' + message.role + '\n' + content + '<|im_end|>' + '\n' }}\n {%- elif message.role == \"assistant\" %}\n {%- set reasoning_content = '' %}\n {%- if message.reasoning_content is string %}\n {%- set reasoning_content = message.reasoning_content %}\n {%- else %}\n {%- if '</think>' in content %}\n {%- set reasoning_content = content.split('</think>')[0].rstrip('\n').split('<think>')[-1].lstrip('\n') %}\n {%- set content = content.split('</think>')[-1].lstrip('\n') %}\n {%- endif %}\n {%- endif %}\n {%- if loop.index0 > ns.last_query_index or keep_all_think or (extra_body is defined and extra_body.keep_all_think) %}\n {%- if loop.last or (not loop.last and reasoning_content) %}\n {{- '<|im_start|>' + message.role + '\n<think>\n' + reasoning_content.strip('\n') + '\n</think>\n\n' + content.lstrip('\n') }}\n {%- else %}\n {{- '<|im_start|>' + message.role + '\n' + content }}\n {%- endif %}\n {%- else %}\n {{- '<|im_start|>' + message.role + '\n' + content }}\n {%- endif %}\n {%- if message.tool_calls %}\n {%- for tool_call in message.tool_calls %}\n {%- if (loop.first and content) or (not loop.first) %}\n {{- '\n' }}\n {%- endif %}\n {%- if tool_call.function %}\n {%- set tool_call = tool_call.function %}\n {%- endif %}\n {{- '<tool_call>\n{\"name\": \"' }}\n {{- tool_call.name }}\n {{- '\", \"arguments\": ' }}\n {%- if tool_call.arguments is string %}\n {{- tool_call.arguments }}\n {%- else %}\n {{- tool_call.arguments | tojson }}\n {%- endif %}\n {{- '}\n</tool_call>' }}\n {%- endfor %}\n {%- endif %}\n {{- '<|im_end|>\n' }}\n {%- elif message.role == \"tool\" %}\n {%- if loop.first or (messages[loop.index0 - 1].role != \"tool\") %}\n {{- '<|im_start|>user' }}\n {%- endif %}\n {{- '\n<tool_response>\n' }}\n {{- content }}\n {{- '\n</tool_response>' }}\n {%- if loop.last or (messages[loop.index0 + 1].role != \"tool\") %}\n {{- '<|im_end|>\n' }}\n {%- endif %}\n {%- endif %}\n {%- endfor %}\n {%- if add_generation_prompt %}\n {{- '<|im_start|>assistant\n' }}\n {%- endif %}\n",
|
| 92 |
+
"clean_up_tokenization_spaces": false,
|
| 93 |
+
"eos_token": "<|im_end|>",
|
| 94 |
+
"extra_special_tokens": {},
|
| 95 |
+
"legacy": true,
|
| 96 |
+
"model_max_length": 1000000000000000019884624838656,
|
| 97 |
+
"pad_token": "<unk>",
|
| 98 |
+
"sp_model_kwargs": {},
|
| 99 |
+
"spaces_between_special_tokens": false,
|
| 100 |
+
"tokenizer_class": "LlamaTokenizer",
|
| 101 |
+
"unk_token": "<unk>",
|
| 102 |
+
"use_default_system_prompt": false
|
| 103 |
+
}
|
tokenizer_config_search.json
ADDED
|
@@ -0,0 +1,101 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"add_bos_token": true,
|
| 3 |
+
"add_eos_token": false,
|
| 4 |
+
"add_prefix_space": true,
|
| 5 |
+
"added_tokens_decoder": {
|
| 6 |
+
"0": {
|
| 7 |
+
"content": "<unk>",
|
| 8 |
+
"lstrip": false,
|
| 9 |
+
"normalized": true,
|
| 10 |
+
"rstrip": false,
|
| 11 |
+
"single_word": false,
|
| 12 |
+
"special": true
|
| 13 |
+
},
|
| 14 |
+
"1": {
|
| 15 |
+
"content": "<s>",
|
| 16 |
+
"lstrip": false,
|
| 17 |
+
"normalized": true,
|
| 18 |
+
"rstrip": false,
|
| 19 |
+
"single_word": false,
|
| 20 |
+
"special": true
|
| 21 |
+
},
|
| 22 |
+
"2": {
|
| 23 |
+
"content": "</s>",
|
| 24 |
+
"lstrip": false,
|
| 25 |
+
"normalized": true,
|
| 26 |
+
"rstrip": false,
|
| 27 |
+
"single_word": false,
|
| 28 |
+
"special": true
|
| 29 |
+
},
|
| 30 |
+
"166100": {
|
| 31 |
+
"content": "<|im_start|>",
|
| 32 |
+
"lstrip": false,
|
| 33 |
+
"normalized": false,
|
| 34 |
+
"rstrip": false,
|
| 35 |
+
"single_word": false,
|
| 36 |
+
"special": true
|
| 37 |
+
},
|
| 38 |
+
"166101": {
|
| 39 |
+
"content": "<|im_end|>",
|
| 40 |
+
"lstrip": false,
|
| 41 |
+
"normalized": false,
|
| 42 |
+
"rstrip": false,
|
| 43 |
+
"single_word": false,
|
| 44 |
+
"special": true
|
| 45 |
+
},
|
| 46 |
+
"166102": {
|
| 47 |
+
"content": "<|endoftext|>",
|
| 48 |
+
"lstrip": false,
|
| 49 |
+
"normalized": false,
|
| 50 |
+
"rstrip": false,
|
| 51 |
+
"single_word": false,
|
| 52 |
+
"special": true
|
| 53 |
+
},
|
| 54 |
+
"166103": {
|
| 55 |
+
"content": "<think>",
|
| 56 |
+
"lstrip": false,
|
| 57 |
+
"normalized": true,
|
| 58 |
+
"rstrip": false,
|
| 59 |
+
"single_word": false,
|
| 60 |
+
"special": false
|
| 61 |
+
},
|
| 62 |
+
"166104": {
|
| 63 |
+
"content": "</think>",
|
| 64 |
+
"lstrip": false,
|
| 65 |
+
"normalized": true,
|
| 66 |
+
"rstrip": false,
|
| 67 |
+
"single_word": false,
|
| 68 |
+
"special": false
|
| 69 |
+
},
|
| 70 |
+
"166105": {
|
| 71 |
+
"content": "<tool_call>",
|
| 72 |
+
"lstrip": false,
|
| 73 |
+
"normalized": true,
|
| 74 |
+
"rstrip": false,
|
| 75 |
+
"single_word": false,
|
| 76 |
+
"special": false
|
| 77 |
+
},
|
| 78 |
+
"166106": {
|
| 79 |
+
"content": "</tool_call>",
|
| 80 |
+
"lstrip": false,
|
| 81 |
+
"normalized": true,
|
| 82 |
+
"rstrip": false,
|
| 83 |
+
"single_word": false,
|
| 84 |
+
"special": false
|
| 85 |
+
}
|
| 86 |
+
},
|
| 87 |
+
"additional_special_tokens": [],
|
| 88 |
+
"bos_token": "<|im_start|>",
|
| 89 |
+
"chat_template": "{% for message in messages %}{% if loop.first and messages[0]['role'] != 'system' %}{{ '<|im_start|>system\n你是南北阁,一款由BOSS直聘自主研发并训练的专业大语言模型。<|im_end|>\n' }}{% endif %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if (add_generation_prompt is defined and add_generation_prompt) %}{{ '<|im_start|>assistant\n' }}{% endif %}",
|
| 90 |
+
"clean_up_tokenization_spaces": false,
|
| 91 |
+
"eos_token": "<|im_end|>",
|
| 92 |
+
"extra_special_tokens": {},
|
| 93 |
+
"legacy": true,
|
| 94 |
+
"model_max_length": 1000000000000000019884624838656,
|
| 95 |
+
"pad_token": "<unk>",
|
| 96 |
+
"sp_model_kwargs": {},
|
| 97 |
+
"spaces_between_special_tokens": false,
|
| 98 |
+
"tokenizer_class": "LlamaTokenizer",
|
| 99 |
+
"unk_token": "<unk>",
|
| 100 |
+
"use_default_system_prompt": false
|
| 101 |
+
}
|