INC4AI commited on
Commit
b34b5de
·
verified ·
1 Parent(s): 6e78480

Upload model

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitattributes +2 -0
  2. README.md +0 -0
  3. chat_template.jinja +154 -0
  4. config.json +1790 -0
  5. generation_config.json +13 -0
  6. model-00001-of-00042.safetensors +3 -0
  7. model-00002-of-00042.safetensors +3 -0
  8. model-00003-of-00042.safetensors +3 -0
  9. model-00004-of-00042.safetensors +3 -0
  10. model-00005-of-00042.safetensors +3 -0
  11. model-00006-of-00042.safetensors +3 -0
  12. model-00007-of-00042.safetensors +3 -0
  13. model-00008-of-00042.safetensors +3 -0
  14. model-00009-of-00042.safetensors +3 -0
  15. model-00010-of-00042.safetensors +3 -0
  16. model-00011-of-00042.safetensors +3 -0
  17. model-00012-of-00042.safetensors +3 -0
  18. model-00013-of-00042.safetensors +3 -0
  19. model-00014-of-00042.safetensors +3 -0
  20. model-00015-of-00042.safetensors +3 -0
  21. model-00016-of-00042.safetensors +3 -0
  22. model-00017-of-00042.safetensors +3 -0
  23. model-00018-of-00042.safetensors +3 -0
  24. model-00019-of-00042.safetensors +3 -0
  25. model-00020-of-00042.safetensors +3 -0
  26. model-00021-of-00042.safetensors +3 -0
  27. model-00022-of-00042.safetensors +3 -0
  28. model-00023-of-00042.safetensors +3 -0
  29. model-00024-of-00042.safetensors +3 -0
  30. model-00025-of-00042.safetensors +3 -0
  31. model-00026-of-00042.safetensors +3 -0
  32. model-00027-of-00042.safetensors +3 -0
  33. model-00028-of-00042.safetensors +3 -0
  34. model-00029-of-00042.safetensors +3 -0
  35. model-00030-of-00042.safetensors +3 -0
  36. model-00031-of-00042.safetensors +3 -0
  37. model-00032-of-00042.safetensors +3 -0
  38. model-00033-of-00042.safetensors +3 -0
  39. model-00034-of-00042.safetensors +3 -0
  40. model-00035-of-00042.safetensors +3 -0
  41. model-00036-of-00042.safetensors +3 -0
  42. model-00037-of-00042.safetensors +3 -0
  43. model-00038-of-00042.safetensors +3 -0
  44. model-00039-of-00042.safetensors +3 -0
  45. model-00040-of-00042.safetensors +3 -0
  46. model-00041-of-00042.safetensors +3 -0
  47. model-00042-of-00042.safetensors +3 -0
  48. model.safetensors.index.json +3 -0
  49. processor_config.json +63 -0
  50. quantization_config.json +1648 -0
.gitattributes CHANGED
@@ -33,3 +33,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ model.safetensors.index.json filter=lfs diff=lfs merge=lfs -text
37
+ tokenizer.json filter=lfs diff=lfs merge=lfs -text
README.md ADDED
File without changes
chat_template.jinja ADDED
@@ -0,0 +1,154 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {%- set image_count = namespace(value=0) %}
2
+ {%- set video_count = namespace(value=0) %}
3
+ {%- macro render_content(content, do_vision_count, is_system_content=false) %}
4
+ {%- if content is string %}
5
+ {{- content }}
6
+ {%- elif content is iterable and content is not mapping %}
7
+ {%- for item in content %}
8
+ {%- if 'image' in item or 'image_url' in item or item.type == 'image' %}
9
+ {%- if is_system_content %}
10
+ {{- raise_exception('System message cannot contain images.') }}
11
+ {%- endif %}
12
+ {%- if do_vision_count %}
13
+ {%- set image_count.value = image_count.value + 1 %}
14
+ {%- endif %}
15
+ {%- if add_vision_id %}
16
+ {{- 'Picture ' ~ image_count.value ~ ': ' }}
17
+ {%- endif %}
18
+ {{- '<|vision_start|><|image_pad|><|vision_end|>' }}
19
+ {%- elif 'video' in item or item.type == 'video' %}
20
+ {%- if is_system_content %}
21
+ {{- raise_exception('System message cannot contain videos.') }}
22
+ {%- endif %}
23
+ {%- if do_vision_count %}
24
+ {%- set video_count.value = video_count.value + 1 %}
25
+ {%- endif %}
26
+ {%- if add_vision_id %}
27
+ {{- 'Video ' ~ video_count.value ~ ': ' }}
28
+ {%- endif %}
29
+ {{- '<|vision_start|><|video_pad|><|vision_end|>' }}
30
+ {%- elif 'text' in item %}
31
+ {{- item.text }}
32
+ {%- else %}
33
+ {{- raise_exception('Unexpected item type in content.') }}
34
+ {%- endif %}
35
+ {%- endfor %}
36
+ {%- elif content is none or content is undefined %}
37
+ {{- '' }}
38
+ {%- else %}
39
+ {{- raise_exception('Unexpected content type.') }}
40
+ {%- endif %}
41
+ {%- endmacro %}
42
+ {%- if not messages %}
43
+ {{- raise_exception('No messages provided.') }}
44
+ {%- endif %}
45
+ {%- if tools and tools is iterable and tools is not mapping %}
46
+ {{- '<|im_start|>system\n' }}
47
+ {{- "# Tools\n\nYou have access to the following functions:\n\n<tools>" }}
48
+ {%- for tool in tools %}
49
+ {{- "\n" }}
50
+ {{- tool | tojson }}
51
+ {%- endfor %}
52
+ {{- "\n</tools>" }}
53
+ {{- '\n\nIf you choose to call a function ONLY reply in the following format with NO suffix:\n\n<tool_call>\n<function=example_function_name>\n<parameter=example_parameter_1>\nvalue_1\n</parameter>\n<parameter=example_parameter_2>\nThis is the value for the second parameter\nthat can span\nmultiple lines\n</parameter>\n</function>\n</tool_call>\n\n<IMPORTANT>\nReminder:\n- Function calls MUST follow the specified format: an inner <function=...></function> block must be nested within <tool_call></tool_call> XML tags\n- Required parameters MUST be specified\n- You may provide optional reasoning for your function call in natural language BEFORE the function call, but NOT after\n- If there is no function call available, answer the question like normal with your current knowledge and do not tell the user about function calls\n</IMPORTANT>' }}
54
+ {%- if messages[0].role == 'system' %}
55
+ {%- set content = render_content(messages[0].content, false, true)|trim %}
56
+ {%- if content %}
57
+ {{- '\n\n' + content }}
58
+ {%- endif %}
59
+ {%- endif %}
60
+ {{- '<|im_end|>\n' }}
61
+ {%- else %}
62
+ {%- if messages[0].role == 'system' %}
63
+ {%- set content = render_content(messages[0].content, false, true)|trim %}
64
+ {{- '<|im_start|>system\n' + content + '<|im_end|>\n' }}
65
+ {%- endif %}
66
+ {%- endif %}
67
+ {%- set ns = namespace(multi_step_tool=true, last_query_index=messages|length - 1) %}
68
+ {%- for message in messages[::-1] %}
69
+ {%- set index = (messages|length - 1) - loop.index0 %}
70
+ {%- if ns.multi_step_tool and message.role == "user" %}
71
+ {%- set content = render_content(message.content, false)|trim %}
72
+ {%- if not(content.startswith('<tool_response>') and content.endswith('</tool_response>')) %}
73
+ {%- set ns.multi_step_tool = false %}
74
+ {%- set ns.last_query_index = index %}
75
+ {%- endif %}
76
+ {%- endif %}
77
+ {%- endfor %}
78
+ {%- if ns.multi_step_tool %}
79
+ {{- raise_exception('No user query found in messages.') }}
80
+ {%- endif %}
81
+ {%- for message in messages %}
82
+ {%- set content = render_content(message.content, true)|trim %}
83
+ {%- if message.role == "system" %}
84
+ {%- if not loop.first %}
85
+ {{- raise_exception('System message must be at the beginning.') }}
86
+ {%- endif %}
87
+ {%- elif message.role == "user" %}
88
+ {{- '<|im_start|>' + message.role + '\n' + content + '<|im_end|>' + '\n' }}
89
+ {%- elif message.role == "assistant" %}
90
+ {%- set reasoning_content = '' %}
91
+ {%- if message.reasoning_content is string %}
92
+ {%- set reasoning_content = message.reasoning_content %}
93
+ {%- else %}
94
+ {%- if '</think>' in content %}
95
+ {%- set reasoning_content = content.split('</think>')[0].rstrip('\n').split('<think>')[-1].lstrip('\n') %}
96
+ {%- set content = content.split('</think>')[-1].lstrip('\n') %}
97
+ {%- endif %}
98
+ {%- endif %}
99
+ {%- set reasoning_content = reasoning_content|trim %}
100
+ {%- if loop.index0 > ns.last_query_index %}
101
+ {{- '<|im_start|>' + message.role + '\n<think>\n' + reasoning_content + '\n</think>\n\n' + content }}
102
+ {%- else %}
103
+ {{- '<|im_start|>' + message.role + '\n' + content }}
104
+ {%- endif %}
105
+ {%- if message.tool_calls and message.tool_calls is iterable and message.tool_calls is not mapping %}
106
+ {%- for tool_call in message.tool_calls %}
107
+ {%- if tool_call.function is defined %}
108
+ {%- set tool_call = tool_call.function %}
109
+ {%- endif %}
110
+ {%- if loop.first %}
111
+ {%- if content|trim %}
112
+ {{- '\n\n<tool_call>\n<function=' + tool_call.name + '>\n' }}
113
+ {%- else %}
114
+ {{- '<tool_call>\n<function=' + tool_call.name + '>\n' }}
115
+ {%- endif %}
116
+ {%- else %}
117
+ {{- '\n<tool_call>\n<function=' + tool_call.name + '>\n' }}
118
+ {%- endif %}
119
+ {%- if tool_call.arguments is defined %}
120
+ {%- for args_name, args_value in tool_call.arguments|items %}
121
+ {{- '<parameter=' + args_name + '>\n' }}
122
+ {%- set args_value = args_value | tojson | safe if args_value is mapping or (args_value is sequence and args_value is not string) else args_value | string %}
123
+ {{- args_value }}
124
+ {{- '\n</parameter>\n' }}
125
+ {%- endfor %}
126
+ {%- endif %}
127
+ {{- '</function>\n</tool_call>' }}
128
+ {%- endfor %}
129
+ {%- endif %}
130
+ {{- '<|im_end|>\n' }}
131
+ {%- elif message.role == "tool" %}
132
+ {%- if loop.previtem and loop.previtem.role != "tool" %}
133
+ {{- '<|im_start|>user' }}
134
+ {%- endif %}
135
+ {{- '\n<tool_response>\n' }}
136
+ {{- content }}
137
+ {{- '\n</tool_response>' }}
138
+ {%- if not loop.last and loop.nextitem.role != "tool" %}
139
+ {{- '<|im_end|>\n' }}
140
+ {%- elif loop.last %}
141
+ {{- '<|im_end|>\n' }}
142
+ {%- endif %}
143
+ {%- else %}
144
+ {{- raise_exception('Unexpected message role.') }}
145
+ {%- endif %}
146
+ {%- endfor %}
147
+ {%- if add_generation_prompt %}
148
+ {{- '<|im_start|>assistant\n' }}
149
+ {%- if enable_thinking is defined and enable_thinking is false %}
150
+ {{- '<think>\n\n</think>\n\n' }}
151
+ {%- else %}
152
+ {{- '<think>\n' }}
153
+ {%- endif %}
154
+ {%- endif %}
config.json ADDED
@@ -0,0 +1,1790 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "Qwen3_5MoeForConditionalGeneration"
4
+ ],
5
+ "dtype": "bfloat16",
6
+ "image_token_id": 248056,
7
+ "model_type": "qwen3_5_moe",
8
+ "quantization_config": {
9
+ "autoround_version": "0.12.0",
10
+ "bits": 4,
11
+ "block_name_to_quantize": "model.language_model.layers",
12
+ "data_type": "int",
13
+ "extra_config": {
14
+ "model.language_model.layers.0.linear_attn.in_proj_a": {
15
+ "bits": 16
16
+ },
17
+ "model.language_model.layers.0.linear_attn.in_proj_b": {
18
+ "bits": 16
19
+ },
20
+ "model.language_model.layers.0.linear_attn.in_proj_qkv": {
21
+ "bits": 16
22
+ },
23
+ "model.language_model.layers.0.linear_attn.in_proj_z": {
24
+ "bits": 16
25
+ },
26
+ "model.language_model.layers.0.linear_attn.out_proj": {
27
+ "bits": 16
28
+ },
29
+ "model.language_model.layers.0.mlp.shared_expert.down_proj": {
30
+ "bits": 16
31
+ },
32
+ "model.language_model.layers.0.mlp.shared_expert.gate_proj": {
33
+ "bits": 16
34
+ },
35
+ "model.language_model.layers.0.mlp.shared_expert.up_proj": {
36
+ "bits": 16
37
+ },
38
+ "model.language_model.layers.0.mlp.shared_expert_gate": {
39
+ "bits": 16,
40
+ "data_type": "fp"
41
+ },
42
+ "model.language_model.layers.1.linear_attn.in_proj_a": {
43
+ "bits": 16
44
+ },
45
+ "model.language_model.layers.1.linear_attn.in_proj_b": {
46
+ "bits": 16
47
+ },
48
+ "model.language_model.layers.1.linear_attn.in_proj_qkv": {
49
+ "bits": 16
50
+ },
51
+ "model.language_model.layers.1.linear_attn.in_proj_z": {
52
+ "bits": 16
53
+ },
54
+ "model.language_model.layers.1.linear_attn.out_proj": {
55
+ "bits": 16
56
+ },
57
+ "model.language_model.layers.1.mlp.shared_expert.down_proj": {
58
+ "bits": 16
59
+ },
60
+ "model.language_model.layers.1.mlp.shared_expert.gate_proj": {
61
+ "bits": 16
62
+ },
63
+ "model.language_model.layers.1.mlp.shared_expert.up_proj": {
64
+ "bits": 16
65
+ },
66
+ "model.language_model.layers.1.mlp.shared_expert_gate": {
67
+ "bits": 16,
68
+ "data_type": "fp"
69
+ },
70
+ "model.language_model.layers.10.linear_attn.in_proj_a": {
71
+ "bits": 16
72
+ },
73
+ "model.language_model.layers.10.linear_attn.in_proj_b": {
74
+ "bits": 16
75
+ },
76
+ "model.language_model.layers.10.linear_attn.in_proj_qkv": {
77
+ "bits": 16
78
+ },
79
+ "model.language_model.layers.10.linear_attn.in_proj_z": {
80
+ "bits": 16
81
+ },
82
+ "model.language_model.layers.10.linear_attn.out_proj": {
83
+ "bits": 16
84
+ },
85
+ "model.language_model.layers.10.mlp.shared_expert.down_proj": {
86
+ "bits": 16
87
+ },
88
+ "model.language_model.layers.10.mlp.shared_expert.gate_proj": {
89
+ "bits": 16
90
+ },
91
+ "model.language_model.layers.10.mlp.shared_expert.up_proj": {
92
+ "bits": 16
93
+ },
94
+ "model.language_model.layers.10.mlp.shared_expert_gate": {
95
+ "bits": 16,
96
+ "data_type": "fp"
97
+ },
98
+ "model.language_model.layers.11.mlp.shared_expert.down_proj": {
99
+ "bits": 16
100
+ },
101
+ "model.language_model.layers.11.mlp.shared_expert.gate_proj": {
102
+ "bits": 16
103
+ },
104
+ "model.language_model.layers.11.mlp.shared_expert.up_proj": {
105
+ "bits": 16
106
+ },
107
+ "model.language_model.layers.11.mlp.shared_expert_gate": {
108
+ "bits": 16,
109
+ "data_type": "fp"
110
+ },
111
+ "model.language_model.layers.11.self_attn.k_proj": {
112
+ "bits": 16
113
+ },
114
+ "model.language_model.layers.11.self_attn.o_proj": {
115
+ "bits": 16
116
+ },
117
+ "model.language_model.layers.11.self_attn.q_proj": {
118
+ "bits": 16
119
+ },
120
+ "model.language_model.layers.11.self_attn.v_proj": {
121
+ "bits": 16
122
+ },
123
+ "model.language_model.layers.12.linear_attn.in_proj_a": {
124
+ "bits": 16
125
+ },
126
+ "model.language_model.layers.12.linear_attn.in_proj_b": {
127
+ "bits": 16
128
+ },
129
+ "model.language_model.layers.12.linear_attn.in_proj_qkv": {
130
+ "bits": 16
131
+ },
132
+ "model.language_model.layers.12.linear_attn.in_proj_z": {
133
+ "bits": 16
134
+ },
135
+ "model.language_model.layers.12.linear_attn.out_proj": {
136
+ "bits": 16
137
+ },
138
+ "model.language_model.layers.12.mlp.shared_expert.down_proj": {
139
+ "bits": 16
140
+ },
141
+ "model.language_model.layers.12.mlp.shared_expert.gate_proj": {
142
+ "bits": 16
143
+ },
144
+ "model.language_model.layers.12.mlp.shared_expert.up_proj": {
145
+ "bits": 16
146
+ },
147
+ "model.language_model.layers.12.mlp.shared_expert_gate": {
148
+ "bits": 16,
149
+ "data_type": "fp"
150
+ },
151
+ "model.language_model.layers.13.linear_attn.in_proj_a": {
152
+ "bits": 16
153
+ },
154
+ "model.language_model.layers.13.linear_attn.in_proj_b": {
155
+ "bits": 16
156
+ },
157
+ "model.language_model.layers.13.linear_attn.in_proj_qkv": {
158
+ "bits": 16
159
+ },
160
+ "model.language_model.layers.13.linear_attn.in_proj_z": {
161
+ "bits": 16
162
+ },
163
+ "model.language_model.layers.13.linear_attn.out_proj": {
164
+ "bits": 16
165
+ },
166
+ "model.language_model.layers.13.mlp.shared_expert.down_proj": {
167
+ "bits": 16
168
+ },
169
+ "model.language_model.layers.13.mlp.shared_expert.gate_proj": {
170
+ "bits": 16
171
+ },
172
+ "model.language_model.layers.13.mlp.shared_expert.up_proj": {
173
+ "bits": 16
174
+ },
175
+ "model.language_model.layers.13.mlp.shared_expert_gate": {
176
+ "bits": 16,
177
+ "data_type": "fp"
178
+ },
179
+ "model.language_model.layers.14.linear_attn.in_proj_a": {
180
+ "bits": 16
181
+ },
182
+ "model.language_model.layers.14.linear_attn.in_proj_b": {
183
+ "bits": 16
184
+ },
185
+ "model.language_model.layers.14.linear_attn.in_proj_qkv": {
186
+ "bits": 16
187
+ },
188
+ "model.language_model.layers.14.linear_attn.in_proj_z": {
189
+ "bits": 16
190
+ },
191
+ "model.language_model.layers.14.linear_attn.out_proj": {
192
+ "bits": 16
193
+ },
194
+ "model.language_model.layers.14.mlp.shared_expert.down_proj": {
195
+ "bits": 16
196
+ },
197
+ "model.language_model.layers.14.mlp.shared_expert.gate_proj": {
198
+ "bits": 16
199
+ },
200
+ "model.language_model.layers.14.mlp.shared_expert.up_proj": {
201
+ "bits": 16
202
+ },
203
+ "model.language_model.layers.14.mlp.shared_expert_gate": {
204
+ "bits": 16,
205
+ "data_type": "fp"
206
+ },
207
+ "model.language_model.layers.15.mlp.shared_expert.down_proj": {
208
+ "bits": 16
209
+ },
210
+ "model.language_model.layers.15.mlp.shared_expert.gate_proj": {
211
+ "bits": 16
212
+ },
213
+ "model.language_model.layers.15.mlp.shared_expert.up_proj": {
214
+ "bits": 16
215
+ },
216
+ "model.language_model.layers.15.mlp.shared_expert_gate": {
217
+ "bits": 16,
218
+ "data_type": "fp"
219
+ },
220
+ "model.language_model.layers.15.self_attn.k_proj": {
221
+ "bits": 16
222
+ },
223
+ "model.language_model.layers.15.self_attn.o_proj": {
224
+ "bits": 16
225
+ },
226
+ "model.language_model.layers.15.self_attn.q_proj": {
227
+ "bits": 16
228
+ },
229
+ "model.language_model.layers.15.self_attn.v_proj": {
230
+ "bits": 16
231
+ },
232
+ "model.language_model.layers.16.linear_attn.in_proj_a": {
233
+ "bits": 16
234
+ },
235
+ "model.language_model.layers.16.linear_attn.in_proj_b": {
236
+ "bits": 16
237
+ },
238
+ "model.language_model.layers.16.linear_attn.in_proj_qkv": {
239
+ "bits": 16
240
+ },
241
+ "model.language_model.layers.16.linear_attn.in_proj_z": {
242
+ "bits": 16
243
+ },
244
+ "model.language_model.layers.16.linear_attn.out_proj": {
245
+ "bits": 16
246
+ },
247
+ "model.language_model.layers.16.mlp.shared_expert.down_proj": {
248
+ "bits": 16
249
+ },
250
+ "model.language_model.layers.16.mlp.shared_expert.gate_proj": {
251
+ "bits": 16
252
+ },
253
+ "model.language_model.layers.16.mlp.shared_expert.up_proj": {
254
+ "bits": 16
255
+ },
256
+ "model.language_model.layers.16.mlp.shared_expert_gate": {
257
+ "bits": 16,
258
+ "data_type": "fp"
259
+ },
260
+ "model.language_model.layers.17.linear_attn.in_proj_a": {
261
+ "bits": 16
262
+ },
263
+ "model.language_model.layers.17.linear_attn.in_proj_b": {
264
+ "bits": 16
265
+ },
266
+ "model.language_model.layers.17.linear_attn.in_proj_qkv": {
267
+ "bits": 16
268
+ },
269
+ "model.language_model.layers.17.linear_attn.in_proj_z": {
270
+ "bits": 16
271
+ },
272
+ "model.language_model.layers.17.linear_attn.out_proj": {
273
+ "bits": 16
274
+ },
275
+ "model.language_model.layers.17.mlp.shared_expert.down_proj": {
276
+ "bits": 16
277
+ },
278
+ "model.language_model.layers.17.mlp.shared_expert.gate_proj": {
279
+ "bits": 16
280
+ },
281
+ "model.language_model.layers.17.mlp.shared_expert.up_proj": {
282
+ "bits": 16
283
+ },
284
+ "model.language_model.layers.17.mlp.shared_expert_gate": {
285
+ "bits": 16,
286
+ "data_type": "fp"
287
+ },
288
+ "model.language_model.layers.18.linear_attn.in_proj_a": {
289
+ "bits": 16
290
+ },
291
+ "model.language_model.layers.18.linear_attn.in_proj_b": {
292
+ "bits": 16
293
+ },
294
+ "model.language_model.layers.18.linear_attn.in_proj_qkv": {
295
+ "bits": 16
296
+ },
297
+ "model.language_model.layers.18.linear_attn.in_proj_z": {
298
+ "bits": 16
299
+ },
300
+ "model.language_model.layers.18.linear_attn.out_proj": {
301
+ "bits": 16
302
+ },
303
+ "model.language_model.layers.18.mlp.shared_expert.down_proj": {
304
+ "bits": 16
305
+ },
306
+ "model.language_model.layers.18.mlp.shared_expert.gate_proj": {
307
+ "bits": 16
308
+ },
309
+ "model.language_model.layers.18.mlp.shared_expert.up_proj": {
310
+ "bits": 16
311
+ },
312
+ "model.language_model.layers.18.mlp.shared_expert_gate": {
313
+ "bits": 16,
314
+ "data_type": "fp"
315
+ },
316
+ "model.language_model.layers.19.mlp.shared_expert.down_proj": {
317
+ "bits": 16
318
+ },
319
+ "model.language_model.layers.19.mlp.shared_expert.gate_proj": {
320
+ "bits": 16
321
+ },
322
+ "model.language_model.layers.19.mlp.shared_expert.up_proj": {
323
+ "bits": 16
324
+ },
325
+ "model.language_model.layers.19.mlp.shared_expert_gate": {
326
+ "bits": 16,
327
+ "data_type": "fp"
328
+ },
329
+ "model.language_model.layers.19.self_attn.k_proj": {
330
+ "bits": 16
331
+ },
332
+ "model.language_model.layers.19.self_attn.o_proj": {
333
+ "bits": 16
334
+ },
335
+ "model.language_model.layers.19.self_attn.q_proj": {
336
+ "bits": 16
337
+ },
338
+ "model.language_model.layers.19.self_attn.v_proj": {
339
+ "bits": 16
340
+ },
341
+ "model.language_model.layers.2.linear_attn.in_proj_a": {
342
+ "bits": 16
343
+ },
344
+ "model.language_model.layers.2.linear_attn.in_proj_b": {
345
+ "bits": 16
346
+ },
347
+ "model.language_model.layers.2.linear_attn.in_proj_qkv": {
348
+ "bits": 16
349
+ },
350
+ "model.language_model.layers.2.linear_attn.in_proj_z": {
351
+ "bits": 16
352
+ },
353
+ "model.language_model.layers.2.linear_attn.out_proj": {
354
+ "bits": 16
355
+ },
356
+ "model.language_model.layers.2.mlp.shared_expert.down_proj": {
357
+ "bits": 16
358
+ },
359
+ "model.language_model.layers.2.mlp.shared_expert.gate_proj": {
360
+ "bits": 16
361
+ },
362
+ "model.language_model.layers.2.mlp.shared_expert.up_proj": {
363
+ "bits": 16
364
+ },
365
+ "model.language_model.layers.2.mlp.shared_expert_gate": {
366
+ "bits": 16,
367
+ "data_type": "fp"
368
+ },
369
+ "model.language_model.layers.20.linear_attn.in_proj_a": {
370
+ "bits": 16
371
+ },
372
+ "model.language_model.layers.20.linear_attn.in_proj_b": {
373
+ "bits": 16
374
+ },
375
+ "model.language_model.layers.20.linear_attn.in_proj_qkv": {
376
+ "bits": 16
377
+ },
378
+ "model.language_model.layers.20.linear_attn.in_proj_z": {
379
+ "bits": 16
380
+ },
381
+ "model.language_model.layers.20.linear_attn.out_proj": {
382
+ "bits": 16
383
+ },
384
+ "model.language_model.layers.20.mlp.shared_expert.down_proj": {
385
+ "bits": 16
386
+ },
387
+ "model.language_model.layers.20.mlp.shared_expert.gate_proj": {
388
+ "bits": 16
389
+ },
390
+ "model.language_model.layers.20.mlp.shared_expert.up_proj": {
391
+ "bits": 16
392
+ },
393
+ "model.language_model.layers.20.mlp.shared_expert_gate": {
394
+ "bits": 16,
395
+ "data_type": "fp"
396
+ },
397
+ "model.language_model.layers.21.linear_attn.in_proj_a": {
398
+ "bits": 16
399
+ },
400
+ "model.language_model.layers.21.linear_attn.in_proj_b": {
401
+ "bits": 16
402
+ },
403
+ "model.language_model.layers.21.linear_attn.in_proj_qkv": {
404
+ "bits": 16
405
+ },
406
+ "model.language_model.layers.21.linear_attn.in_proj_z": {
407
+ "bits": 16
408
+ },
409
+ "model.language_model.layers.21.linear_attn.out_proj": {
410
+ "bits": 16
411
+ },
412
+ "model.language_model.layers.21.mlp.shared_expert.down_proj": {
413
+ "bits": 16
414
+ },
415
+ "model.language_model.layers.21.mlp.shared_expert.gate_proj": {
416
+ "bits": 16
417
+ },
418
+ "model.language_model.layers.21.mlp.shared_expert.up_proj": {
419
+ "bits": 16
420
+ },
421
+ "model.language_model.layers.21.mlp.shared_expert_gate": {
422
+ "bits": 16,
423
+ "data_type": "fp"
424
+ },
425
+ "model.language_model.layers.22.linear_attn.in_proj_a": {
426
+ "bits": 16
427
+ },
428
+ "model.language_model.layers.22.linear_attn.in_proj_b": {
429
+ "bits": 16
430
+ },
431
+ "model.language_model.layers.22.linear_attn.in_proj_qkv": {
432
+ "bits": 16
433
+ },
434
+ "model.language_model.layers.22.linear_attn.in_proj_z": {
435
+ "bits": 16
436
+ },
437
+ "model.language_model.layers.22.linear_attn.out_proj": {
438
+ "bits": 16
439
+ },
440
+ "model.language_model.layers.22.mlp.shared_expert.down_proj": {
441
+ "bits": 16
442
+ },
443
+ "model.language_model.layers.22.mlp.shared_expert.gate_proj": {
444
+ "bits": 16
445
+ },
446
+ "model.language_model.layers.22.mlp.shared_expert.up_proj": {
447
+ "bits": 16
448
+ },
449
+ "model.language_model.layers.22.mlp.shared_expert_gate": {
450
+ "bits": 16,
451
+ "data_type": "fp"
452
+ },
453
+ "model.language_model.layers.23.mlp.shared_expert.down_proj": {
454
+ "bits": 16
455
+ },
456
+ "model.language_model.layers.23.mlp.shared_expert.gate_proj": {
457
+ "bits": 16
458
+ },
459
+ "model.language_model.layers.23.mlp.shared_expert.up_proj": {
460
+ "bits": 16
461
+ },
462
+ "model.language_model.layers.23.mlp.shared_expert_gate": {
463
+ "bits": 16,
464
+ "data_type": "fp"
465
+ },
466
+ "model.language_model.layers.23.self_attn.k_proj": {
467
+ "bits": 16
468
+ },
469
+ "model.language_model.layers.23.self_attn.o_proj": {
470
+ "bits": 16
471
+ },
472
+ "model.language_model.layers.23.self_attn.q_proj": {
473
+ "bits": 16
474
+ },
475
+ "model.language_model.layers.23.self_attn.v_proj": {
476
+ "bits": 16
477
+ },
478
+ "model.language_model.layers.24.linear_attn.in_proj_a": {
479
+ "bits": 16
480
+ },
481
+ "model.language_model.layers.24.linear_attn.in_proj_b": {
482
+ "bits": 16
483
+ },
484
+ "model.language_model.layers.24.linear_attn.in_proj_qkv": {
485
+ "bits": 16
486
+ },
487
+ "model.language_model.layers.24.linear_attn.in_proj_z": {
488
+ "bits": 16
489
+ },
490
+ "model.language_model.layers.24.linear_attn.out_proj": {
491
+ "bits": 16
492
+ },
493
+ "model.language_model.layers.24.mlp.shared_expert.down_proj": {
494
+ "bits": 16
495
+ },
496
+ "model.language_model.layers.24.mlp.shared_expert.gate_proj": {
497
+ "bits": 16
498
+ },
499
+ "model.language_model.layers.24.mlp.shared_expert.up_proj": {
500
+ "bits": 16
501
+ },
502
+ "model.language_model.layers.24.mlp.shared_expert_gate": {
503
+ "bits": 16,
504
+ "data_type": "fp"
505
+ },
506
+ "model.language_model.layers.25.linear_attn.in_proj_a": {
507
+ "bits": 16
508
+ },
509
+ "model.language_model.layers.25.linear_attn.in_proj_b": {
510
+ "bits": 16
511
+ },
512
+ "model.language_model.layers.25.linear_attn.in_proj_qkv": {
513
+ "bits": 16
514
+ },
515
+ "model.language_model.layers.25.linear_attn.in_proj_z": {
516
+ "bits": 16
517
+ },
518
+ "model.language_model.layers.25.linear_attn.out_proj": {
519
+ "bits": 16
520
+ },
521
+ "model.language_model.layers.25.mlp.shared_expert.down_proj": {
522
+ "bits": 16
523
+ },
524
+ "model.language_model.layers.25.mlp.shared_expert.gate_proj": {
525
+ "bits": 16
526
+ },
527
+ "model.language_model.layers.25.mlp.shared_expert.up_proj": {
528
+ "bits": 16
529
+ },
530
+ "model.language_model.layers.25.mlp.shared_expert_gate": {
531
+ "bits": 16,
532
+ "data_type": "fp"
533
+ },
534
+ "model.language_model.layers.26.linear_attn.in_proj_a": {
535
+ "bits": 16
536
+ },
537
+ "model.language_model.layers.26.linear_attn.in_proj_b": {
538
+ "bits": 16
539
+ },
540
+ "model.language_model.layers.26.linear_attn.in_proj_qkv": {
541
+ "bits": 16
542
+ },
543
+ "model.language_model.layers.26.linear_attn.in_proj_z": {
544
+ "bits": 16
545
+ },
546
+ "model.language_model.layers.26.linear_attn.out_proj": {
547
+ "bits": 16
548
+ },
549
+ "model.language_model.layers.26.mlp.shared_expert.down_proj": {
550
+ "bits": 16
551
+ },
552
+ "model.language_model.layers.26.mlp.shared_expert.gate_proj": {
553
+ "bits": 16
554
+ },
555
+ "model.language_model.layers.26.mlp.shared_expert.up_proj": {
556
+ "bits": 16
557
+ },
558
+ "model.language_model.layers.26.mlp.shared_expert_gate": {
559
+ "bits": 16,
560
+ "data_type": "fp"
561
+ },
562
+ "model.language_model.layers.27.mlp.shared_expert.down_proj": {
563
+ "bits": 16
564
+ },
565
+ "model.language_model.layers.27.mlp.shared_expert.gate_proj": {
566
+ "bits": 16
567
+ },
568
+ "model.language_model.layers.27.mlp.shared_expert.up_proj": {
569
+ "bits": 16
570
+ },
571
+ "model.language_model.layers.27.mlp.shared_expert_gate": {
572
+ "bits": 16,
573
+ "data_type": "fp"
574
+ },
575
+ "model.language_model.layers.27.self_attn.k_proj": {
576
+ "bits": 16
577
+ },
578
+ "model.language_model.layers.27.self_attn.o_proj": {
579
+ "bits": 16
580
+ },
581
+ "model.language_model.layers.27.self_attn.q_proj": {
582
+ "bits": 16
583
+ },
584
+ "model.language_model.layers.27.self_attn.v_proj": {
585
+ "bits": 16
586
+ },
587
+ "model.language_model.layers.28.linear_attn.in_proj_a": {
588
+ "bits": 16
589
+ },
590
+ "model.language_model.layers.28.linear_attn.in_proj_b": {
591
+ "bits": 16
592
+ },
593
+ "model.language_model.layers.28.linear_attn.in_proj_qkv": {
594
+ "bits": 16
595
+ },
596
+ "model.language_model.layers.28.linear_attn.in_proj_z": {
597
+ "bits": 16
598
+ },
599
+ "model.language_model.layers.28.linear_attn.out_proj": {
600
+ "bits": 16
601
+ },
602
+ "model.language_model.layers.28.mlp.shared_expert.down_proj": {
603
+ "bits": 16
604
+ },
605
+ "model.language_model.layers.28.mlp.shared_expert.gate_proj": {
606
+ "bits": 16
607
+ },
608
+ "model.language_model.layers.28.mlp.shared_expert.up_proj": {
609
+ "bits": 16
610
+ },
611
+ "model.language_model.layers.28.mlp.shared_expert_gate": {
612
+ "bits": 16,
613
+ "data_type": "fp"
614
+ },
615
+ "model.language_model.layers.29.linear_attn.in_proj_a": {
616
+ "bits": 16
617
+ },
618
+ "model.language_model.layers.29.linear_attn.in_proj_b": {
619
+ "bits": 16
620
+ },
621
+ "model.language_model.layers.29.linear_attn.in_proj_qkv": {
622
+ "bits": 16
623
+ },
624
+ "model.language_model.layers.29.linear_attn.in_proj_z": {
625
+ "bits": 16
626
+ },
627
+ "model.language_model.layers.29.linear_attn.out_proj": {
628
+ "bits": 16
629
+ },
630
+ "model.language_model.layers.29.mlp.shared_expert.down_proj": {
631
+ "bits": 16
632
+ },
633
+ "model.language_model.layers.29.mlp.shared_expert.gate_proj": {
634
+ "bits": 16
635
+ },
636
+ "model.language_model.layers.29.mlp.shared_expert.up_proj": {
637
+ "bits": 16
638
+ },
639
+ "model.language_model.layers.29.mlp.shared_expert_gate": {
640
+ "bits": 16,
641
+ "data_type": "fp"
642
+ },
643
+ "model.language_model.layers.3.mlp.shared_expert.down_proj": {
644
+ "bits": 16
645
+ },
646
+ "model.language_model.layers.3.mlp.shared_expert.gate_proj": {
647
+ "bits": 16
648
+ },
649
+ "model.language_model.layers.3.mlp.shared_expert.up_proj": {
650
+ "bits": 16
651
+ },
652
+ "model.language_model.layers.3.mlp.shared_expert_gate": {
653
+ "bits": 16,
654
+ "data_type": "fp"
655
+ },
656
+ "model.language_model.layers.3.self_attn.k_proj": {
657
+ "bits": 16
658
+ },
659
+ "model.language_model.layers.3.self_attn.o_proj": {
660
+ "bits": 16
661
+ },
662
+ "model.language_model.layers.3.self_attn.q_proj": {
663
+ "bits": 16
664
+ },
665
+ "model.language_model.layers.3.self_attn.v_proj": {
666
+ "bits": 16
667
+ },
668
+ "model.language_model.layers.30.linear_attn.in_proj_a": {
669
+ "bits": 16
670
+ },
671
+ "model.language_model.layers.30.linear_attn.in_proj_b": {
672
+ "bits": 16
673
+ },
674
+ "model.language_model.layers.30.linear_attn.in_proj_qkv": {
675
+ "bits": 16
676
+ },
677
+ "model.language_model.layers.30.linear_attn.in_proj_z": {
678
+ "bits": 16
679
+ },
680
+ "model.language_model.layers.30.linear_attn.out_proj": {
681
+ "bits": 16
682
+ },
683
+ "model.language_model.layers.30.mlp.shared_expert.down_proj": {
684
+ "bits": 16
685
+ },
686
+ "model.language_model.layers.30.mlp.shared_expert.gate_proj": {
687
+ "bits": 16
688
+ },
689
+ "model.language_model.layers.30.mlp.shared_expert.up_proj": {
690
+ "bits": 16
691
+ },
692
+ "model.language_model.layers.30.mlp.shared_expert_gate": {
693
+ "bits": 16,
694
+ "data_type": "fp"
695
+ },
696
+ "model.language_model.layers.31.mlp.shared_expert.down_proj": {
697
+ "bits": 16
698
+ },
699
+ "model.language_model.layers.31.mlp.shared_expert.gate_proj": {
700
+ "bits": 16
701
+ },
702
+ "model.language_model.layers.31.mlp.shared_expert.up_proj": {
703
+ "bits": 16
704
+ },
705
+ "model.language_model.layers.31.mlp.shared_expert_gate": {
706
+ "bits": 16,
707
+ "data_type": "fp"
708
+ },
709
+ "model.language_model.layers.31.self_attn.k_proj": {
710
+ "bits": 16
711
+ },
712
+ "model.language_model.layers.31.self_attn.o_proj": {
713
+ "bits": 16
714
+ },
715
+ "model.language_model.layers.31.self_attn.q_proj": {
716
+ "bits": 16
717
+ },
718
+ "model.language_model.layers.31.self_attn.v_proj": {
719
+ "bits": 16
720
+ },
721
+ "model.language_model.layers.32.linear_attn.in_proj_a": {
722
+ "bits": 16
723
+ },
724
+ "model.language_model.layers.32.linear_attn.in_proj_b": {
725
+ "bits": 16
726
+ },
727
+ "model.language_model.layers.32.linear_attn.in_proj_qkv": {
728
+ "bits": 16
729
+ },
730
+ "model.language_model.layers.32.linear_attn.in_proj_z": {
731
+ "bits": 16
732
+ },
733
+ "model.language_model.layers.32.linear_attn.out_proj": {
734
+ "bits": 16
735
+ },
736
+ "model.language_model.layers.32.mlp.shared_expert.down_proj": {
737
+ "bits": 16
738
+ },
739
+ "model.language_model.layers.32.mlp.shared_expert.gate_proj": {
740
+ "bits": 16
741
+ },
742
+ "model.language_model.layers.32.mlp.shared_expert.up_proj": {
743
+ "bits": 16
744
+ },
745
+ "model.language_model.layers.32.mlp.shared_expert_gate": {
746
+ "bits": 16,
747
+ "data_type": "fp"
748
+ },
749
+ "model.language_model.layers.33.linear_attn.in_proj_a": {
750
+ "bits": 16
751
+ },
752
+ "model.language_model.layers.33.linear_attn.in_proj_b": {
753
+ "bits": 16
754
+ },
755
+ "model.language_model.layers.33.linear_attn.in_proj_qkv": {
756
+ "bits": 16
757
+ },
758
+ "model.language_model.layers.33.linear_attn.in_proj_z": {
759
+ "bits": 16
760
+ },
761
+ "model.language_model.layers.33.linear_attn.out_proj": {
762
+ "bits": 16
763
+ },
764
+ "model.language_model.layers.33.mlp.shared_expert.down_proj": {
765
+ "bits": 16
766
+ },
767
+ "model.language_model.layers.33.mlp.shared_expert.gate_proj": {
768
+ "bits": 16
769
+ },
770
+ "model.language_model.layers.33.mlp.shared_expert.up_proj": {
771
+ "bits": 16
772
+ },
773
+ "model.language_model.layers.33.mlp.shared_expert_gate": {
774
+ "bits": 16,
775
+ "data_type": "fp"
776
+ },
777
+ "model.language_model.layers.34.linear_attn.in_proj_a": {
778
+ "bits": 16
779
+ },
780
+ "model.language_model.layers.34.linear_attn.in_proj_b": {
781
+ "bits": 16
782
+ },
783
+ "model.language_model.layers.34.linear_attn.in_proj_qkv": {
784
+ "bits": 16
785
+ },
786
+ "model.language_model.layers.34.linear_attn.in_proj_z": {
787
+ "bits": 16
788
+ },
789
+ "model.language_model.layers.34.linear_attn.out_proj": {
790
+ "bits": 16
791
+ },
792
+ "model.language_model.layers.34.mlp.shared_expert.down_proj": {
793
+ "bits": 16
794
+ },
795
+ "model.language_model.layers.34.mlp.shared_expert.gate_proj": {
796
+ "bits": 16
797
+ },
798
+ "model.language_model.layers.34.mlp.shared_expert.up_proj": {
799
+ "bits": 16
800
+ },
801
+ "model.language_model.layers.34.mlp.shared_expert_gate": {
802
+ "bits": 16,
803
+ "data_type": "fp"
804
+ },
805
+ "model.language_model.layers.35.mlp.shared_expert.down_proj": {
806
+ "bits": 16
807
+ },
808
+ "model.language_model.layers.35.mlp.shared_expert.gate_proj": {
809
+ "bits": 16
810
+ },
811
+ "model.language_model.layers.35.mlp.shared_expert.up_proj": {
812
+ "bits": 16
813
+ },
814
+ "model.language_model.layers.35.mlp.shared_expert_gate": {
815
+ "bits": 16,
816
+ "data_type": "fp"
817
+ },
818
+ "model.language_model.layers.35.self_attn.k_proj": {
819
+ "bits": 16
820
+ },
821
+ "model.language_model.layers.35.self_attn.o_proj": {
822
+ "bits": 16
823
+ },
824
+ "model.language_model.layers.35.self_attn.q_proj": {
825
+ "bits": 16
826
+ },
827
+ "model.language_model.layers.35.self_attn.v_proj": {
828
+ "bits": 16
829
+ },
830
+ "model.language_model.layers.36.linear_attn.in_proj_a": {
831
+ "bits": 16
832
+ },
833
+ "model.language_model.layers.36.linear_attn.in_proj_b": {
834
+ "bits": 16
835
+ },
836
+ "model.language_model.layers.36.linear_attn.in_proj_qkv": {
837
+ "bits": 16
838
+ },
839
+ "model.language_model.layers.36.linear_attn.in_proj_z": {
840
+ "bits": 16
841
+ },
842
+ "model.language_model.layers.36.linear_attn.out_proj": {
843
+ "bits": 16
844
+ },
845
+ "model.language_model.layers.36.mlp.shared_expert.down_proj": {
846
+ "bits": 16
847
+ },
848
+ "model.language_model.layers.36.mlp.shared_expert.gate_proj": {
849
+ "bits": 16
850
+ },
851
+ "model.language_model.layers.36.mlp.shared_expert.up_proj": {
852
+ "bits": 16
853
+ },
854
+ "model.language_model.layers.36.mlp.shared_expert_gate": {
855
+ "bits": 16,
856
+ "data_type": "fp"
857
+ },
858
+ "model.language_model.layers.37.linear_attn.in_proj_a": {
859
+ "bits": 16
860
+ },
861
+ "model.language_model.layers.37.linear_attn.in_proj_b": {
862
+ "bits": 16
863
+ },
864
+ "model.language_model.layers.37.linear_attn.in_proj_qkv": {
865
+ "bits": 16
866
+ },
867
+ "model.language_model.layers.37.linear_attn.in_proj_z": {
868
+ "bits": 16
869
+ },
870
+ "model.language_model.layers.37.linear_attn.out_proj": {
871
+ "bits": 16
872
+ },
873
+ "model.language_model.layers.37.mlp.shared_expert.down_proj": {
874
+ "bits": 16
875
+ },
876
+ "model.language_model.layers.37.mlp.shared_expert.gate_proj": {
877
+ "bits": 16
878
+ },
879
+ "model.language_model.layers.37.mlp.shared_expert.up_proj": {
880
+ "bits": 16
881
+ },
882
+ "model.language_model.layers.37.mlp.shared_expert_gate": {
883
+ "bits": 16,
884
+ "data_type": "fp"
885
+ },
886
+ "model.language_model.layers.38.linear_attn.in_proj_a": {
887
+ "bits": 16
888
+ },
889
+ "model.language_model.layers.38.linear_attn.in_proj_b": {
890
+ "bits": 16
891
+ },
892
+ "model.language_model.layers.38.linear_attn.in_proj_qkv": {
893
+ "bits": 16
894
+ },
895
+ "model.language_model.layers.38.linear_attn.in_proj_z": {
896
+ "bits": 16
897
+ },
898
+ "model.language_model.layers.38.linear_attn.out_proj": {
899
+ "bits": 16
900
+ },
901
+ "model.language_model.layers.38.mlp.shared_expert.down_proj": {
902
+ "bits": 16
903
+ },
904
+ "model.language_model.layers.38.mlp.shared_expert.gate_proj": {
905
+ "bits": 16
906
+ },
907
+ "model.language_model.layers.38.mlp.shared_expert.up_proj": {
908
+ "bits": 16
909
+ },
910
+ "model.language_model.layers.38.mlp.shared_expert_gate": {
911
+ "bits": 16,
912
+ "data_type": "fp"
913
+ },
914
+ "model.language_model.layers.39.mlp.shared_expert.down_proj": {
915
+ "bits": 16
916
+ },
917
+ "model.language_model.layers.39.mlp.shared_expert.gate_proj": {
918
+ "bits": 16
919
+ },
920
+ "model.language_model.layers.39.mlp.shared_expert.up_proj": {
921
+ "bits": 16
922
+ },
923
+ "model.language_model.layers.39.mlp.shared_expert_gate": {
924
+ "bits": 16,
925
+ "data_type": "fp"
926
+ },
927
+ "model.language_model.layers.39.self_attn.k_proj": {
928
+ "bits": 16
929
+ },
930
+ "model.language_model.layers.39.self_attn.o_proj": {
931
+ "bits": 16
932
+ },
933
+ "model.language_model.layers.39.self_attn.q_proj": {
934
+ "bits": 16
935
+ },
936
+ "model.language_model.layers.39.self_attn.v_proj": {
937
+ "bits": 16
938
+ },
939
+ "model.language_model.layers.4.linear_attn.in_proj_a": {
940
+ "bits": 16
941
+ },
942
+ "model.language_model.layers.4.linear_attn.in_proj_b": {
943
+ "bits": 16
944
+ },
945
+ "model.language_model.layers.4.linear_attn.in_proj_qkv": {
946
+ "bits": 16
947
+ },
948
+ "model.language_model.layers.4.linear_attn.in_proj_z": {
949
+ "bits": 16
950
+ },
951
+ "model.language_model.layers.4.linear_attn.out_proj": {
952
+ "bits": 16
953
+ },
954
+ "model.language_model.layers.4.mlp.shared_expert.down_proj": {
955
+ "bits": 16
956
+ },
957
+ "model.language_model.layers.4.mlp.shared_expert.gate_proj": {
958
+ "bits": 16
959
+ },
960
+ "model.language_model.layers.4.mlp.shared_expert.up_proj": {
961
+ "bits": 16
962
+ },
963
+ "model.language_model.layers.4.mlp.shared_expert_gate": {
964
+ "bits": 16,
965
+ "data_type": "fp"
966
+ },
967
+ "model.language_model.layers.40.linear_attn.in_proj_a": {
968
+ "bits": 16
969
+ },
970
+ "model.language_model.layers.40.linear_attn.in_proj_b": {
971
+ "bits": 16
972
+ },
973
+ "model.language_model.layers.40.linear_attn.in_proj_qkv": {
974
+ "bits": 16
975
+ },
976
+ "model.language_model.layers.40.linear_attn.in_proj_z": {
977
+ "bits": 16
978
+ },
979
+ "model.language_model.layers.40.linear_attn.out_proj": {
980
+ "bits": 16
981
+ },
982
+ "model.language_model.layers.40.mlp.shared_expert.down_proj": {
983
+ "bits": 16
984
+ },
985
+ "model.language_model.layers.40.mlp.shared_expert.gate_proj": {
986
+ "bits": 16
987
+ },
988
+ "model.language_model.layers.40.mlp.shared_expert.up_proj": {
989
+ "bits": 16
990
+ },
991
+ "model.language_model.layers.40.mlp.shared_expert_gate": {
992
+ "bits": 16,
993
+ "data_type": "fp"
994
+ },
995
+ "model.language_model.layers.41.linear_attn.in_proj_a": {
996
+ "bits": 16
997
+ },
998
+ "model.language_model.layers.41.linear_attn.in_proj_b": {
999
+ "bits": 16
1000
+ },
1001
+ "model.language_model.layers.41.linear_attn.in_proj_qkv": {
1002
+ "bits": 16
1003
+ },
1004
+ "model.language_model.layers.41.linear_attn.in_proj_z": {
1005
+ "bits": 16
1006
+ },
1007
+ "model.language_model.layers.41.linear_attn.out_proj": {
1008
+ "bits": 16
1009
+ },
1010
+ "model.language_model.layers.41.mlp.shared_expert.down_proj": {
1011
+ "bits": 16
1012
+ },
1013
+ "model.language_model.layers.41.mlp.shared_expert.gate_proj": {
1014
+ "bits": 16
1015
+ },
1016
+ "model.language_model.layers.41.mlp.shared_expert.up_proj": {
1017
+ "bits": 16
1018
+ },
1019
+ "model.language_model.layers.41.mlp.shared_expert_gate": {
1020
+ "bits": 16,
1021
+ "data_type": "fp"
1022
+ },
1023
+ "model.language_model.layers.42.linear_attn.in_proj_a": {
1024
+ "bits": 16
1025
+ },
1026
+ "model.language_model.layers.42.linear_attn.in_proj_b": {
1027
+ "bits": 16
1028
+ },
1029
+ "model.language_model.layers.42.linear_attn.in_proj_qkv": {
1030
+ "bits": 16
1031
+ },
1032
+ "model.language_model.layers.42.linear_attn.in_proj_z": {
1033
+ "bits": 16
1034
+ },
1035
+ "model.language_model.layers.42.linear_attn.out_proj": {
1036
+ "bits": 16
1037
+ },
1038
+ "model.language_model.layers.42.mlp.shared_expert.down_proj": {
1039
+ "bits": 16
1040
+ },
1041
+ "model.language_model.layers.42.mlp.shared_expert.gate_proj": {
1042
+ "bits": 16
1043
+ },
1044
+ "model.language_model.layers.42.mlp.shared_expert.up_proj": {
1045
+ "bits": 16
1046
+ },
1047
+ "model.language_model.layers.42.mlp.shared_expert_gate": {
1048
+ "bits": 16,
1049
+ "data_type": "fp"
1050
+ },
1051
+ "model.language_model.layers.43.mlp.shared_expert.down_proj": {
1052
+ "bits": 16
1053
+ },
1054
+ "model.language_model.layers.43.mlp.shared_expert.gate_proj": {
1055
+ "bits": 16
1056
+ },
1057
+ "model.language_model.layers.43.mlp.shared_expert.up_proj": {
1058
+ "bits": 16
1059
+ },
1060
+ "model.language_model.layers.43.mlp.shared_expert_gate": {
1061
+ "bits": 16,
1062
+ "data_type": "fp"
1063
+ },
1064
+ "model.language_model.layers.43.self_attn.k_proj": {
1065
+ "bits": 16
1066
+ },
1067
+ "model.language_model.layers.43.self_attn.o_proj": {
1068
+ "bits": 16
1069
+ },
1070
+ "model.language_model.layers.43.self_attn.q_proj": {
1071
+ "bits": 16
1072
+ },
1073
+ "model.language_model.layers.43.self_attn.v_proj": {
1074
+ "bits": 16
1075
+ },
1076
+ "model.language_model.layers.44.linear_attn.in_proj_a": {
1077
+ "bits": 16
1078
+ },
1079
+ "model.language_model.layers.44.linear_attn.in_proj_b": {
1080
+ "bits": 16
1081
+ },
1082
+ "model.language_model.layers.44.linear_attn.in_proj_qkv": {
1083
+ "bits": 16
1084
+ },
1085
+ "model.language_model.layers.44.linear_attn.in_proj_z": {
1086
+ "bits": 16
1087
+ },
1088
+ "model.language_model.layers.44.linear_attn.out_proj": {
1089
+ "bits": 16
1090
+ },
1091
+ "model.language_model.layers.44.mlp.shared_expert.down_proj": {
1092
+ "bits": 16
1093
+ },
1094
+ "model.language_model.layers.44.mlp.shared_expert.gate_proj": {
1095
+ "bits": 16
1096
+ },
1097
+ "model.language_model.layers.44.mlp.shared_expert.up_proj": {
1098
+ "bits": 16
1099
+ },
1100
+ "model.language_model.layers.44.mlp.shared_expert_gate": {
1101
+ "bits": 16,
1102
+ "data_type": "fp"
1103
+ },
1104
+ "model.language_model.layers.45.linear_attn.in_proj_a": {
1105
+ "bits": 16
1106
+ },
1107
+ "model.language_model.layers.45.linear_attn.in_proj_b": {
1108
+ "bits": 16
1109
+ },
1110
+ "model.language_model.layers.45.linear_attn.in_proj_qkv": {
1111
+ "bits": 16
1112
+ },
1113
+ "model.language_model.layers.45.linear_attn.in_proj_z": {
1114
+ "bits": 16
1115
+ },
1116
+ "model.language_model.layers.45.linear_attn.out_proj": {
1117
+ "bits": 16
1118
+ },
1119
+ "model.language_model.layers.45.mlp.shared_expert.down_proj": {
1120
+ "bits": 16
1121
+ },
1122
+ "model.language_model.layers.45.mlp.shared_expert.gate_proj": {
1123
+ "bits": 16
1124
+ },
1125
+ "model.language_model.layers.45.mlp.shared_expert.up_proj": {
1126
+ "bits": 16
1127
+ },
1128
+ "model.language_model.layers.45.mlp.shared_expert_gate": {
1129
+ "bits": 16,
1130
+ "data_type": "fp"
1131
+ },
1132
+ "model.language_model.layers.46.linear_attn.in_proj_a": {
1133
+ "bits": 16
1134
+ },
1135
+ "model.language_model.layers.46.linear_attn.in_proj_b": {
1136
+ "bits": 16
1137
+ },
1138
+ "model.language_model.layers.46.linear_attn.in_proj_qkv": {
1139
+ "bits": 16
1140
+ },
1141
+ "model.language_model.layers.46.linear_attn.in_proj_z": {
1142
+ "bits": 16
1143
+ },
1144
+ "model.language_model.layers.46.linear_attn.out_proj": {
1145
+ "bits": 16
1146
+ },
1147
+ "model.language_model.layers.46.mlp.shared_expert.down_proj": {
1148
+ "bits": 16
1149
+ },
1150
+ "model.language_model.layers.46.mlp.shared_expert.gate_proj": {
1151
+ "bits": 16
1152
+ },
1153
+ "model.language_model.layers.46.mlp.shared_expert.up_proj": {
1154
+ "bits": 16
1155
+ },
1156
+ "model.language_model.layers.46.mlp.shared_expert_gate": {
1157
+ "bits": 16,
1158
+ "data_type": "fp"
1159
+ },
1160
+ "model.language_model.layers.47.mlp.shared_expert.down_proj": {
1161
+ "bits": 16
1162
+ },
1163
+ "model.language_model.layers.47.mlp.shared_expert.gate_proj": {
1164
+ "bits": 16
1165
+ },
1166
+ "model.language_model.layers.47.mlp.shared_expert.up_proj": {
1167
+ "bits": 16
1168
+ },
1169
+ "model.language_model.layers.47.mlp.shared_expert_gate": {
1170
+ "bits": 16,
1171
+ "data_type": "fp"
1172
+ },
1173
+ "model.language_model.layers.47.self_attn.k_proj": {
1174
+ "bits": 16
1175
+ },
1176
+ "model.language_model.layers.47.self_attn.o_proj": {
1177
+ "bits": 16
1178
+ },
1179
+ "model.language_model.layers.47.self_attn.q_proj": {
1180
+ "bits": 16
1181
+ },
1182
+ "model.language_model.layers.47.self_attn.v_proj": {
1183
+ "bits": 16
1184
+ },
1185
+ "model.language_model.layers.48.linear_attn.in_proj_a": {
1186
+ "bits": 16
1187
+ },
1188
+ "model.language_model.layers.48.linear_attn.in_proj_b": {
1189
+ "bits": 16
1190
+ },
1191
+ "model.language_model.layers.48.linear_attn.in_proj_qkv": {
1192
+ "bits": 16
1193
+ },
1194
+ "model.language_model.layers.48.linear_attn.in_proj_z": {
1195
+ "bits": 16
1196
+ },
1197
+ "model.language_model.layers.48.linear_attn.out_proj": {
1198
+ "bits": 16
1199
+ },
1200
+ "model.language_model.layers.48.mlp.shared_expert.down_proj": {
1201
+ "bits": 16
1202
+ },
1203
+ "model.language_model.layers.48.mlp.shared_expert.gate_proj": {
1204
+ "bits": 16
1205
+ },
1206
+ "model.language_model.layers.48.mlp.shared_expert.up_proj": {
1207
+ "bits": 16
1208
+ },
1209
+ "model.language_model.layers.48.mlp.shared_expert_gate": {
1210
+ "bits": 16,
1211
+ "data_type": "fp"
1212
+ },
1213
+ "model.language_model.layers.49.linear_attn.in_proj_a": {
1214
+ "bits": 16
1215
+ },
1216
+ "model.language_model.layers.49.linear_attn.in_proj_b": {
1217
+ "bits": 16
1218
+ },
1219
+ "model.language_model.layers.49.linear_attn.in_proj_qkv": {
1220
+ "bits": 16
1221
+ },
1222
+ "model.language_model.layers.49.linear_attn.in_proj_z": {
1223
+ "bits": 16
1224
+ },
1225
+ "model.language_model.layers.49.linear_attn.out_proj": {
1226
+ "bits": 16
1227
+ },
1228
+ "model.language_model.layers.49.mlp.shared_expert.down_proj": {
1229
+ "bits": 16
1230
+ },
1231
+ "model.language_model.layers.49.mlp.shared_expert.gate_proj": {
1232
+ "bits": 16
1233
+ },
1234
+ "model.language_model.layers.49.mlp.shared_expert.up_proj": {
1235
+ "bits": 16
1236
+ },
1237
+ "model.language_model.layers.49.mlp.shared_expert_gate": {
1238
+ "bits": 16,
1239
+ "data_type": "fp"
1240
+ },
1241
+ "model.language_model.layers.5.linear_attn.in_proj_a": {
1242
+ "bits": 16
1243
+ },
1244
+ "model.language_model.layers.5.linear_attn.in_proj_b": {
1245
+ "bits": 16
1246
+ },
1247
+ "model.language_model.layers.5.linear_attn.in_proj_qkv": {
1248
+ "bits": 16
1249
+ },
1250
+ "model.language_model.layers.5.linear_attn.in_proj_z": {
1251
+ "bits": 16
1252
+ },
1253
+ "model.language_model.layers.5.linear_attn.out_proj": {
1254
+ "bits": 16
1255
+ },
1256
+ "model.language_model.layers.5.mlp.shared_expert.down_proj": {
1257
+ "bits": 16
1258
+ },
1259
+ "model.language_model.layers.5.mlp.shared_expert.gate_proj": {
1260
+ "bits": 16
1261
+ },
1262
+ "model.language_model.layers.5.mlp.shared_expert.up_proj": {
1263
+ "bits": 16
1264
+ },
1265
+ "model.language_model.layers.5.mlp.shared_expert_gate": {
1266
+ "bits": 16,
1267
+ "data_type": "fp"
1268
+ },
1269
+ "model.language_model.layers.50.linear_attn.in_proj_a": {
1270
+ "bits": 16
1271
+ },
1272
+ "model.language_model.layers.50.linear_attn.in_proj_b": {
1273
+ "bits": 16
1274
+ },
1275
+ "model.language_model.layers.50.linear_attn.in_proj_qkv": {
1276
+ "bits": 16
1277
+ },
1278
+ "model.language_model.layers.50.linear_attn.in_proj_z": {
1279
+ "bits": 16
1280
+ },
1281
+ "model.language_model.layers.50.linear_attn.out_proj": {
1282
+ "bits": 16
1283
+ },
1284
+ "model.language_model.layers.50.mlp.shared_expert.down_proj": {
1285
+ "bits": 16
1286
+ },
1287
+ "model.language_model.layers.50.mlp.shared_expert.gate_proj": {
1288
+ "bits": 16
1289
+ },
1290
+ "model.language_model.layers.50.mlp.shared_expert.up_proj": {
1291
+ "bits": 16
1292
+ },
1293
+ "model.language_model.layers.50.mlp.shared_expert_gate": {
1294
+ "bits": 16,
1295
+ "data_type": "fp"
1296
+ },
1297
+ "model.language_model.layers.51.mlp.shared_expert.down_proj": {
1298
+ "bits": 16
1299
+ },
1300
+ "model.language_model.layers.51.mlp.shared_expert.gate_proj": {
1301
+ "bits": 16
1302
+ },
1303
+ "model.language_model.layers.51.mlp.shared_expert.up_proj": {
1304
+ "bits": 16
1305
+ },
1306
+ "model.language_model.layers.51.mlp.shared_expert_gate": {
1307
+ "bits": 16,
1308
+ "data_type": "fp"
1309
+ },
1310
+ "model.language_model.layers.51.self_attn.k_proj": {
1311
+ "bits": 16
1312
+ },
1313
+ "model.language_model.layers.51.self_attn.o_proj": {
1314
+ "bits": 16
1315
+ },
1316
+ "model.language_model.layers.51.self_attn.q_proj": {
1317
+ "bits": 16
1318
+ },
1319
+ "model.language_model.layers.51.self_attn.v_proj": {
1320
+ "bits": 16
1321
+ },
1322
+ "model.language_model.layers.52.linear_attn.in_proj_a": {
1323
+ "bits": 16
1324
+ },
1325
+ "model.language_model.layers.52.linear_attn.in_proj_b": {
1326
+ "bits": 16
1327
+ },
1328
+ "model.language_model.layers.52.linear_attn.in_proj_qkv": {
1329
+ "bits": 16
1330
+ },
1331
+ "model.language_model.layers.52.linear_attn.in_proj_z": {
1332
+ "bits": 16
1333
+ },
1334
+ "model.language_model.layers.52.linear_attn.out_proj": {
1335
+ "bits": 16
1336
+ },
1337
+ "model.language_model.layers.52.mlp.shared_expert.down_proj": {
1338
+ "bits": 16
1339
+ },
1340
+ "model.language_model.layers.52.mlp.shared_expert.gate_proj": {
1341
+ "bits": 16
1342
+ },
1343
+ "model.language_model.layers.52.mlp.shared_expert.up_proj": {
1344
+ "bits": 16
1345
+ },
1346
+ "model.language_model.layers.52.mlp.shared_expert_gate": {
1347
+ "bits": 16,
1348
+ "data_type": "fp"
1349
+ },
1350
+ "model.language_model.layers.53.linear_attn.in_proj_a": {
1351
+ "bits": 16
1352
+ },
1353
+ "model.language_model.layers.53.linear_attn.in_proj_b": {
1354
+ "bits": 16
1355
+ },
1356
+ "model.language_model.layers.53.linear_attn.in_proj_qkv": {
1357
+ "bits": 16
1358
+ },
1359
+ "model.language_model.layers.53.linear_attn.in_proj_z": {
1360
+ "bits": 16
1361
+ },
1362
+ "model.language_model.layers.53.linear_attn.out_proj": {
1363
+ "bits": 16
1364
+ },
1365
+ "model.language_model.layers.53.mlp.shared_expert.down_proj": {
1366
+ "bits": 16
1367
+ },
1368
+ "model.language_model.layers.53.mlp.shared_expert.gate_proj": {
1369
+ "bits": 16
1370
+ },
1371
+ "model.language_model.layers.53.mlp.shared_expert.up_proj": {
1372
+ "bits": 16
1373
+ },
1374
+ "model.language_model.layers.53.mlp.shared_expert_gate": {
1375
+ "bits": 16,
1376
+ "data_type": "fp"
1377
+ },
1378
+ "model.language_model.layers.54.linear_attn.in_proj_a": {
1379
+ "bits": 16
1380
+ },
1381
+ "model.language_model.layers.54.linear_attn.in_proj_b": {
1382
+ "bits": 16
1383
+ },
1384
+ "model.language_model.layers.54.linear_attn.in_proj_qkv": {
1385
+ "bits": 16
1386
+ },
1387
+ "model.language_model.layers.54.linear_attn.in_proj_z": {
1388
+ "bits": 16
1389
+ },
1390
+ "model.language_model.layers.54.linear_attn.out_proj": {
1391
+ "bits": 16
1392
+ },
1393
+ "model.language_model.layers.54.mlp.shared_expert.down_proj": {
1394
+ "bits": 16
1395
+ },
1396
+ "model.language_model.layers.54.mlp.shared_expert.gate_proj": {
1397
+ "bits": 16
1398
+ },
1399
+ "model.language_model.layers.54.mlp.shared_expert.up_proj": {
1400
+ "bits": 16
1401
+ },
1402
+ "model.language_model.layers.54.mlp.shared_expert_gate": {
1403
+ "bits": 16,
1404
+ "data_type": "fp"
1405
+ },
1406
+ "model.language_model.layers.55.mlp.shared_expert.down_proj": {
1407
+ "bits": 16
1408
+ },
1409
+ "model.language_model.layers.55.mlp.shared_expert.gate_proj": {
1410
+ "bits": 16
1411
+ },
1412
+ "model.language_model.layers.55.mlp.shared_expert.up_proj": {
1413
+ "bits": 16
1414
+ },
1415
+ "model.language_model.layers.55.mlp.shared_expert_gate": {
1416
+ "bits": 16,
1417
+ "data_type": "fp"
1418
+ },
1419
+ "model.language_model.layers.55.self_attn.k_proj": {
1420
+ "bits": 16
1421
+ },
1422
+ "model.language_model.layers.55.self_attn.o_proj": {
1423
+ "bits": 16
1424
+ },
1425
+ "model.language_model.layers.55.self_attn.q_proj": {
1426
+ "bits": 16
1427
+ },
1428
+ "model.language_model.layers.55.self_attn.v_proj": {
1429
+ "bits": 16
1430
+ },
1431
+ "model.language_model.layers.56.linear_attn.in_proj_a": {
1432
+ "bits": 16
1433
+ },
1434
+ "model.language_model.layers.56.linear_attn.in_proj_b": {
1435
+ "bits": 16
1436
+ },
1437
+ "model.language_model.layers.56.linear_attn.in_proj_qkv": {
1438
+ "bits": 16
1439
+ },
1440
+ "model.language_model.layers.56.linear_attn.in_proj_z": {
1441
+ "bits": 16
1442
+ },
1443
+ "model.language_model.layers.56.linear_attn.out_proj": {
1444
+ "bits": 16
1445
+ },
1446
+ "model.language_model.layers.56.mlp.shared_expert.down_proj": {
1447
+ "bits": 16
1448
+ },
1449
+ "model.language_model.layers.56.mlp.shared_expert.gate_proj": {
1450
+ "bits": 16
1451
+ },
1452
+ "model.language_model.layers.56.mlp.shared_expert.up_proj": {
1453
+ "bits": 16
1454
+ },
1455
+ "model.language_model.layers.56.mlp.shared_expert_gate": {
1456
+ "bits": 16,
1457
+ "data_type": "fp"
1458
+ },
1459
+ "model.language_model.layers.57.linear_attn.in_proj_a": {
1460
+ "bits": 16
1461
+ },
1462
+ "model.language_model.layers.57.linear_attn.in_proj_b": {
1463
+ "bits": 16
1464
+ },
1465
+ "model.language_model.layers.57.linear_attn.in_proj_qkv": {
1466
+ "bits": 16
1467
+ },
1468
+ "model.language_model.layers.57.linear_attn.in_proj_z": {
1469
+ "bits": 16
1470
+ },
1471
+ "model.language_model.layers.57.linear_attn.out_proj": {
1472
+ "bits": 16
1473
+ },
1474
+ "model.language_model.layers.57.mlp.shared_expert.down_proj": {
1475
+ "bits": 16
1476
+ },
1477
+ "model.language_model.layers.57.mlp.shared_expert.gate_proj": {
1478
+ "bits": 16
1479
+ },
1480
+ "model.language_model.layers.57.mlp.shared_expert.up_proj": {
1481
+ "bits": 16
1482
+ },
1483
+ "model.language_model.layers.57.mlp.shared_expert_gate": {
1484
+ "bits": 16,
1485
+ "data_type": "fp"
1486
+ },
1487
+ "model.language_model.layers.58.linear_attn.in_proj_a": {
1488
+ "bits": 16
1489
+ },
1490
+ "model.language_model.layers.58.linear_attn.in_proj_b": {
1491
+ "bits": 16
1492
+ },
1493
+ "model.language_model.layers.58.linear_attn.in_proj_qkv": {
1494
+ "bits": 16
1495
+ },
1496
+ "model.language_model.layers.58.linear_attn.in_proj_z": {
1497
+ "bits": 16
1498
+ },
1499
+ "model.language_model.layers.58.linear_attn.out_proj": {
1500
+ "bits": 16
1501
+ },
1502
+ "model.language_model.layers.58.mlp.shared_expert.down_proj": {
1503
+ "bits": 16
1504
+ },
1505
+ "model.language_model.layers.58.mlp.shared_expert.gate_proj": {
1506
+ "bits": 16
1507
+ },
1508
+ "model.language_model.layers.58.mlp.shared_expert.up_proj": {
1509
+ "bits": 16
1510
+ },
1511
+ "model.language_model.layers.58.mlp.shared_expert_gate": {
1512
+ "bits": 16,
1513
+ "data_type": "fp"
1514
+ },
1515
+ "model.language_model.layers.59.mlp.shared_expert.down_proj": {
1516
+ "bits": 16
1517
+ },
1518
+ "model.language_model.layers.59.mlp.shared_expert.gate_proj": {
1519
+ "bits": 16
1520
+ },
1521
+ "model.language_model.layers.59.mlp.shared_expert.up_proj": {
1522
+ "bits": 16
1523
+ },
1524
+ "model.language_model.layers.59.mlp.shared_expert_gate": {
1525
+ "bits": 16,
1526
+ "data_type": "fp"
1527
+ },
1528
+ "model.language_model.layers.59.self_attn.k_proj": {
1529
+ "bits": 16
1530
+ },
1531
+ "model.language_model.layers.59.self_attn.o_proj": {
1532
+ "bits": 16
1533
+ },
1534
+ "model.language_model.layers.59.self_attn.q_proj": {
1535
+ "bits": 16
1536
+ },
1537
+ "model.language_model.layers.59.self_attn.v_proj": {
1538
+ "bits": 16
1539
+ },
1540
+ "model.language_model.layers.6.linear_attn.in_proj_a": {
1541
+ "bits": 16
1542
+ },
1543
+ "model.language_model.layers.6.linear_attn.in_proj_b": {
1544
+ "bits": 16
1545
+ },
1546
+ "model.language_model.layers.6.linear_attn.in_proj_qkv": {
1547
+ "bits": 16
1548
+ },
1549
+ "model.language_model.layers.6.linear_attn.in_proj_z": {
1550
+ "bits": 16
1551
+ },
1552
+ "model.language_model.layers.6.linear_attn.out_proj": {
1553
+ "bits": 16
1554
+ },
1555
+ "model.language_model.layers.6.mlp.shared_expert.down_proj": {
1556
+ "bits": 16
1557
+ },
1558
+ "model.language_model.layers.6.mlp.shared_expert.gate_proj": {
1559
+ "bits": 16
1560
+ },
1561
+ "model.language_model.layers.6.mlp.shared_expert.up_proj": {
1562
+ "bits": 16
1563
+ },
1564
+ "model.language_model.layers.6.mlp.shared_expert_gate": {
1565
+ "bits": 16,
1566
+ "data_type": "fp"
1567
+ },
1568
+ "model.language_model.layers.7.mlp.shared_expert.down_proj": {
1569
+ "bits": 16
1570
+ },
1571
+ "model.language_model.layers.7.mlp.shared_expert.gate_proj": {
1572
+ "bits": 16
1573
+ },
1574
+ "model.language_model.layers.7.mlp.shared_expert.up_proj": {
1575
+ "bits": 16
1576
+ },
1577
+ "model.language_model.layers.7.mlp.shared_expert_gate": {
1578
+ "bits": 16,
1579
+ "data_type": "fp"
1580
+ },
1581
+ "model.language_model.layers.7.self_attn.k_proj": {
1582
+ "bits": 16
1583
+ },
1584
+ "model.language_model.layers.7.self_attn.o_proj": {
1585
+ "bits": 16
1586
+ },
1587
+ "model.language_model.layers.7.self_attn.q_proj": {
1588
+ "bits": 16
1589
+ },
1590
+ "model.language_model.layers.7.self_attn.v_proj": {
1591
+ "bits": 16
1592
+ },
1593
+ "model.language_model.layers.8.linear_attn.in_proj_a": {
1594
+ "bits": 16
1595
+ },
1596
+ "model.language_model.layers.8.linear_attn.in_proj_b": {
1597
+ "bits": 16
1598
+ },
1599
+ "model.language_model.layers.8.linear_attn.in_proj_qkv": {
1600
+ "bits": 16
1601
+ },
1602
+ "model.language_model.layers.8.linear_attn.in_proj_z": {
1603
+ "bits": 16
1604
+ },
1605
+ "model.language_model.layers.8.linear_attn.out_proj": {
1606
+ "bits": 16
1607
+ },
1608
+ "model.language_model.layers.8.mlp.shared_expert.down_proj": {
1609
+ "bits": 16
1610
+ },
1611
+ "model.language_model.layers.8.mlp.shared_expert.gate_proj": {
1612
+ "bits": 16
1613
+ },
1614
+ "model.language_model.layers.8.mlp.shared_expert.up_proj": {
1615
+ "bits": 16
1616
+ },
1617
+ "model.language_model.layers.8.mlp.shared_expert_gate": {
1618
+ "bits": 16,
1619
+ "data_type": "fp"
1620
+ },
1621
+ "model.language_model.layers.9.linear_attn.in_proj_a": {
1622
+ "bits": 16
1623
+ },
1624
+ "model.language_model.layers.9.linear_attn.in_proj_b": {
1625
+ "bits": 16
1626
+ },
1627
+ "model.language_model.layers.9.linear_attn.in_proj_qkv": {
1628
+ "bits": 16
1629
+ },
1630
+ "model.language_model.layers.9.linear_attn.in_proj_z": {
1631
+ "bits": 16
1632
+ },
1633
+ "model.language_model.layers.9.linear_attn.out_proj": {
1634
+ "bits": 16
1635
+ },
1636
+ "model.language_model.layers.9.mlp.shared_expert.down_proj": {
1637
+ "bits": 16
1638
+ },
1639
+ "model.language_model.layers.9.mlp.shared_expert.gate_proj": {
1640
+ "bits": 16
1641
+ },
1642
+ "model.language_model.layers.9.mlp.shared_expert.up_proj": {
1643
+ "bits": 16
1644
+ },
1645
+ "model.language_model.layers.9.mlp.shared_expert_gate": {
1646
+ "bits": 16,
1647
+ "data_type": "fp"
1648
+ }
1649
+ },
1650
+ "group_size": 128,
1651
+ "iters": 0,
1652
+ "packing_format": "auto_round:auto_gptq",
1653
+ "quant_method": "auto-round",
1654
+ "sym": true
1655
+ },
1656
+ "text_config": {
1657
+ "attention_bias": false,
1658
+ "attention_dropout": 0.0,
1659
+ "attn_output_gate": true,
1660
+ "bos_token_id": null,
1661
+ "dtype": "bfloat16",
1662
+ "eos_token_id": 248044,
1663
+ "full_attention_interval": 4,
1664
+ "head_dim": 256,
1665
+ "hidden_act": "silu",
1666
+ "hidden_size": 4096,
1667
+ "initializer_range": 0.02,
1668
+ "layer_types": [
1669
+ "linear_attention",
1670
+ "linear_attention",
1671
+ "linear_attention",
1672
+ "full_attention",
1673
+ "linear_attention",
1674
+ "linear_attention",
1675
+ "linear_attention",
1676
+ "full_attention",
1677
+ "linear_attention",
1678
+ "linear_attention",
1679
+ "linear_attention",
1680
+ "full_attention",
1681
+ "linear_attention",
1682
+ "linear_attention",
1683
+ "linear_attention",
1684
+ "full_attention",
1685
+ "linear_attention",
1686
+ "linear_attention",
1687
+ "linear_attention",
1688
+ "full_attention",
1689
+ "linear_attention",
1690
+ "linear_attention",
1691
+ "linear_attention",
1692
+ "full_attention",
1693
+ "linear_attention",
1694
+ "linear_attention",
1695
+ "linear_attention",
1696
+ "full_attention",
1697
+ "linear_attention",
1698
+ "linear_attention",
1699
+ "linear_attention",
1700
+ "full_attention",
1701
+ "linear_attention",
1702
+ "linear_attention",
1703
+ "linear_attention",
1704
+ "full_attention",
1705
+ "linear_attention",
1706
+ "linear_attention",
1707
+ "linear_attention",
1708
+ "full_attention",
1709
+ "linear_attention",
1710
+ "linear_attention",
1711
+ "linear_attention",
1712
+ "full_attention",
1713
+ "linear_attention",
1714
+ "linear_attention",
1715
+ "linear_attention",
1716
+ "full_attention",
1717
+ "linear_attention",
1718
+ "linear_attention",
1719
+ "linear_attention",
1720
+ "full_attention",
1721
+ "linear_attention",
1722
+ "linear_attention",
1723
+ "linear_attention",
1724
+ "full_attention",
1725
+ "linear_attention",
1726
+ "linear_attention",
1727
+ "linear_attention",
1728
+ "full_attention"
1729
+ ],
1730
+ "linear_conv_kernel_dim": 4,
1731
+ "linear_key_head_dim": 128,
1732
+ "linear_num_key_heads": 16,
1733
+ "linear_num_value_heads": 64,
1734
+ "linear_value_head_dim": 128,
1735
+ "mamba_ssm_dtype": "float32",
1736
+ "max_position_embeddings": 262144,
1737
+ "mlp_only_layers": [],
1738
+ "model_type": "qwen3_5_moe_text",
1739
+ "moe_intermediate_size": 1024,
1740
+ "mtp_num_hidden_layers": 1,
1741
+ "mtp_use_dedicated_embeddings": false,
1742
+ "num_attention_heads": 32,
1743
+ "num_experts": 512,
1744
+ "num_experts_per_tok": 10,
1745
+ "num_hidden_layers": 60,
1746
+ "num_key_value_heads": 2,
1747
+ "output_router_logits": false,
1748
+ "pad_token_id": null,
1749
+ "partial_rotary_factor": 0.25,
1750
+ "rms_norm_eps": 1e-06,
1751
+ "rope_parameters": {
1752
+ "mrope_interleaved": true,
1753
+ "mrope_section": [
1754
+ 11,
1755
+ 11,
1756
+ 10
1757
+ ],
1758
+ "partial_rotary_factor": 0.25,
1759
+ "rope_theta": 10000000,
1760
+ "rope_type": "default"
1761
+ },
1762
+ "router_aux_loss_coef": 0.001,
1763
+ "shared_expert_intermediate_size": 1024,
1764
+ "tie_word_embeddings": false,
1765
+ "use_cache": true,
1766
+ "vocab_size": 248320
1767
+ },
1768
+ "tie_word_embeddings": false,
1769
+ "transformers_version": "5.2.0.dev0",
1770
+ "video_token_id": 248057,
1771
+ "vision_config": {
1772
+ "deepstack_visual_indexes": [],
1773
+ "depth": 27,
1774
+ "dtype": "bfloat16",
1775
+ "hidden_act": "gelu_pytorch_tanh",
1776
+ "hidden_size": 1152,
1777
+ "in_channels": 3,
1778
+ "initializer_range": 0.02,
1779
+ "intermediate_size": 4304,
1780
+ "model_type": "qwen3_5_moe",
1781
+ "num_heads": 16,
1782
+ "num_position_embeddings": 2304,
1783
+ "out_hidden_size": 4096,
1784
+ "patch_size": 16,
1785
+ "spatial_merge_size": 2,
1786
+ "temporal_patch_size": 2
1787
+ },
1788
+ "vision_end_token_id": 248054,
1789
+ "vision_start_token_id": 248053
1790
+ }
generation_config.json ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token_id": 248044,
3
+ "do_sample": true,
4
+ "eos_token_id": [
5
+ 248046,
6
+ 248044
7
+ ],
8
+ "pad_token_id": 248044,
9
+ "temperature": 0.6,
10
+ "top_k": 20,
11
+ "top_p": 0.95,
12
+ "transformers_version": "5.2.0.dev0"
13
+ }
model-00001-of-00042.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5d944ceff89c5b2042e42f3e66e3f69659b92d2f0fee203f5fcfcd26089d2c46
3
+ size 5365497040
model-00002-of-00042.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8977801d9fe53b5f5b5a6eaeb383ff8d136eaaa5da5980e29605696b7aa9c4fe
3
+ size 5365599616
model-00003-of-00042.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:01a10d5067ac8cc1e0a0efab2ff2d94c00c5c4d6783bd6c1a3d533371851e9bd
3
+ size 5367720344
model-00004-of-00042.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4e3bb27e8d25982d9bfaa359e12c8a2190475e05d36f03c3932cdf7e5b3e1bdd
3
+ size 5367735216
model-00005-of-00042.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a23693952202ef73810c173b56b6fd1dc1d38877622cef7dc08a7c1e0a650b61
3
+ size 5365599880
model-00006-of-00042.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4eeb1dc1ffdfa6f1742d943da81330c65012ebb43d614437b9207b00e368b4c1
3
+ size 5365497104
model-00007-of-00042.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e3a67c0697b7f7c26f04a570b5ac7100c9a39b3b02ddbaa529fc3791d2836908
3
+ size 5365603200
model-00008-of-00042.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fc8bec9f64a77783347e3a0f34cceb0da25f3b51eea0383f79c2618efe491f8e
3
+ size 5367729992
model-00009-of-00042.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:555e618cb1a4fb561e7776a7a33ec3ddbd44e3f8dcc1c54e1578404908284843
3
+ size 5367735256
model-00010-of-00042.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1c907d2aaed1af1987b9fdce179a55511ef3f2db4ed70967d7f1a433f53cc93d
3
+ size 5365605080
model-00011-of-00042.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:43042c370b68366326b20d06dc7549b6e710aeeb6b3ffc1d362c6222bd7118d1
3
+ size 5365501600
model-00012-of-00042.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dea2b9859c09f437502d6509cd80f646ced0cc2ba8a6e61e23a5823cca3a6ebe
3
+ size 5365604848
model-00013-of-00042.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1fd177605ee8e2678b26840283d133a05865946cb15b55f7dc70e8391dcc4ee2
3
+ size 5367734848
model-00014-of-00042.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:82eb0b5bbaa318b2bee81e310f0fbc7f5a0a9866d194e4fb6b47027c4bda1eca
3
+ size 5367730400
model-00015-of-00042.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3301a2e7cc7dd587059081394f43f97a57685922109479c7a2ff5022877bd346
3
+ size 5365605040
model-00016-of-00042.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:80f77b8fc30183c0f1362f522769158e67601571a1b6d138092f3502d9676890
3
+ size 5365501640
model-00017-of-00042.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:eca69c79f539cb56dd384d568b994dcee6f50faf9d11e684832877122e3aa564
3
+ size 5365604848
model-00018-of-00042.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0e1911e111d74a33aa3e0ae7e42f3a1f042b0d92d9b1b19af4101dece83aa098
3
+ size 5367739712
model-00019-of-00042.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:883e8a16df4dd0382372bde5db2437deb9019266232bb70a8b4c522d2fea35de
3
+ size 5367725536
model-00020-of-00042.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2a166e5497e8426e5e83bd8b2a0f46fd119127c06285599c5068375b02a099cf
3
+ size 5365605008
model-00021-of-00042.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ba53c94e68167c8be47722e74f85a467eaa973a348725e872b56d6575059d73e
3
+ size 5365501672
model-00022-of-00042.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:972f698fe22b7e266ebaef1a8fed8f0efa63839f6308791a118da40fb28eef4b
3
+ size 5365604848
model-00023-of-00042.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d9f646e88b93784419b3c9fb0215c5899d42fe797e52b26d40159dee667c4ca4
3
+ size 5367744568
model-00024-of-00042.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4abb0dd746af8fa94ab0b9afea61f73d408b0d19e0b74580f9676bec31c7e4b9
3
+ size 5367720680
model-00025-of-00042.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f4990f080f99500e92402593c42159a78a10bdb8b1b72a398da62b46159a2dbd
3
+ size 5365604968
model-00026-of-00042.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b1050dcd3dedc7029dacbf844e87a181c5afd83fa18309d83415646b755506fd
3
+ size 5367685552
model-00027-of-00042.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:57067853385ead56408bdd4f1bc06fbb31d67a2e868e6044d6c98948917eb52a
3
+ size 5367779576
model-00028-of-00042.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a09d1103347f2f1fa45f9431f826f5a4a2566c9a9180b40bb82e0977304328a2
3
+ size 5367749840
model-00029-of-00042.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d6e7b97b4b73c47018eb1dec8c40ea029558a3ba8b321a5e37ddcac1145794ed
3
+ size 5367715408
model-00030-of-00042.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:28b81f76175dc20a8f1e40411340d88db8dfb3a6085c14e307b001ac9db69ddf
3
+ size 5365604928
model-00031-of-00042.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b8df7e13d7be5b53af0986aac3378e5fd19673dcb4db3c334154691f0210c41a
3
+ size 5367690776
model-00032-of-00042.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c99e8eea00cb6b0b41833bdb01308c2d008fc63bc3179cda859a6c26c5015497
3
+ size 5367774392
model-00033-of-00042.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7eac007b8e3db6ab7af09c4c4569187382325fb4112653f2e0d3c6c495603c3e
3
+ size 5367755104
model-00034-of-00042.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:15349bef8ebb1c1ca6b54ca9aa66de827fff1d7f9ced85fd708091c660a08cde
3
+ size 5367710144
model-00035-of-00042.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b526974130894dec47d6c06c10791445719c58341219124dc651e01d71705a59
3
+ size 5365604888
model-00036-of-00042.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5ce8737d93c6dabe161576e076f13e212194cef5b87ff72753345375330c2612
3
+ size 5367696008
model-00037-of-00042.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:75a9fc56b924b355ed5a8b4d9adf701b635a5cd67de6a683970580e202eb2e2b
3
+ size 5367769200
model-00038-of-00042.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fd0e35b8ca2c3de8d1506a5081e2e30370ba8e7bfbeb1abd636cfa69a68e4da1
3
+ size 5293138480
model-00039-of-00042.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4ba8109aa58e767a9daa7ff723da5844a8bcbbb69f55f660e9982d7f3fd1d657
3
+ size 5366535232
model-00040-of-00042.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0df10a8586dcd1326c5a5d9c9e19123550a8488f358a80bacc9e9c8ee913eb11
3
+ size 5353027632
model-00041-of-00042.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b8fe02a3f59ecc352ee5ea064f34d837c8e20732088eb314af16e0a1abcf90ae
3
+ size 5367690384
model-00042-of-00042.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0aa9ba6946cd0767a9e5113b85ec878fa02fdae13169d2f6c64e37f0a15d2d31
3
+ size 3689643872
model.safetensors.index.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:61432fba053c3c2436c1224f68ff19bdfc78c0ebea6310747061b959d8d05ae1
3
+ size 20109911
processor_config.json ADDED
@@ -0,0 +1,63 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "image_processor": {
3
+ "data_format": "channels_first",
4
+ "do_convert_rgb": true,
5
+ "do_normalize": true,
6
+ "do_rescale": true,
7
+ "do_resize": true,
8
+ "image_mean": [
9
+ 0.5,
10
+ 0.5,
11
+ 0.5
12
+ ],
13
+ "image_processor_type": "Qwen2VLImageProcessorFast",
14
+ "image_std": [
15
+ 0.5,
16
+ 0.5,
17
+ 0.5
18
+ ],
19
+ "merge_size": 2,
20
+ "patch_size": 16,
21
+ "resample": 3,
22
+ "rescale_factor": 0.00392156862745098,
23
+ "size": {
24
+ "longest_edge": 16777216,
25
+ "shortest_edge": 65536
26
+ },
27
+ "temporal_patch_size": 2
28
+ },
29
+ "processor_class": "Qwen3VLProcessor",
30
+ "video_processor": {
31
+ "data_format": "channels_first",
32
+ "default_to_square": true,
33
+ "do_convert_rgb": true,
34
+ "do_normalize": true,
35
+ "do_rescale": true,
36
+ "do_resize": true,
37
+ "do_sample_frames": true,
38
+ "fps": 2,
39
+ "image_mean": [
40
+ 0.5,
41
+ 0.5,
42
+ 0.5
43
+ ],
44
+ "image_std": [
45
+ 0.5,
46
+ 0.5,
47
+ 0.5
48
+ ],
49
+ "max_frames": 768,
50
+ "merge_size": 2,
51
+ "min_frames": 4,
52
+ "patch_size": 16,
53
+ "resample": 3,
54
+ "rescale_factor": 0.00392156862745098,
55
+ "return_metadata": false,
56
+ "size": {
57
+ "longest_edge": 25165824,
58
+ "shortest_edge": 4096
59
+ },
60
+ "temporal_patch_size": 2,
61
+ "video_processor_type": "Qwen3VLVideoProcessor"
62
+ }
63
+ }
quantization_config.json ADDED
@@ -0,0 +1,1648 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bits": 4,
3
+ "data_type": "int",
4
+ "group_size": 128,
5
+ "sym": true,
6
+ "iters": 0,
7
+ "autoround_version": "0.12.0",
8
+ "block_name_to_quantize": "model.language_model.layers",
9
+ "quant_method": "auto-round",
10
+ "packing_format": "auto_round:auto_gptq",
11
+ "extra_config": {
12
+ "model.language_model.layers.0.mlp.shared_expert_gate": {
13
+ "bits": 16,
14
+ "data_type": "fp"
15
+ },
16
+ "model.language_model.layers.1.mlp.shared_expert_gate": {
17
+ "bits": 16,
18
+ "data_type": "fp"
19
+ },
20
+ "model.language_model.layers.2.mlp.shared_expert_gate": {
21
+ "bits": 16,
22
+ "data_type": "fp"
23
+ },
24
+ "model.language_model.layers.3.mlp.shared_expert_gate": {
25
+ "bits": 16,
26
+ "data_type": "fp"
27
+ },
28
+ "model.language_model.layers.4.mlp.shared_expert_gate": {
29
+ "bits": 16,
30
+ "data_type": "fp"
31
+ },
32
+ "model.language_model.layers.5.mlp.shared_expert_gate": {
33
+ "bits": 16,
34
+ "data_type": "fp"
35
+ },
36
+ "model.language_model.layers.6.mlp.shared_expert_gate": {
37
+ "bits": 16,
38
+ "data_type": "fp"
39
+ },
40
+ "model.language_model.layers.7.mlp.shared_expert_gate": {
41
+ "bits": 16,
42
+ "data_type": "fp"
43
+ },
44
+ "model.language_model.layers.8.mlp.shared_expert_gate": {
45
+ "bits": 16,
46
+ "data_type": "fp"
47
+ },
48
+ "model.language_model.layers.9.mlp.shared_expert_gate": {
49
+ "bits": 16,
50
+ "data_type": "fp"
51
+ },
52
+ "model.language_model.layers.10.mlp.shared_expert_gate": {
53
+ "bits": 16,
54
+ "data_type": "fp"
55
+ },
56
+ "model.language_model.layers.11.mlp.shared_expert_gate": {
57
+ "bits": 16,
58
+ "data_type": "fp"
59
+ },
60
+ "model.language_model.layers.12.mlp.shared_expert_gate": {
61
+ "bits": 16,
62
+ "data_type": "fp"
63
+ },
64
+ "model.language_model.layers.13.mlp.shared_expert_gate": {
65
+ "bits": 16,
66
+ "data_type": "fp"
67
+ },
68
+ "model.language_model.layers.14.mlp.shared_expert_gate": {
69
+ "bits": 16,
70
+ "data_type": "fp"
71
+ },
72
+ "model.language_model.layers.15.mlp.shared_expert_gate": {
73
+ "bits": 16,
74
+ "data_type": "fp"
75
+ },
76
+ "model.language_model.layers.16.mlp.shared_expert_gate": {
77
+ "bits": 16,
78
+ "data_type": "fp"
79
+ },
80
+ "model.language_model.layers.17.mlp.shared_expert_gate": {
81
+ "bits": 16,
82
+ "data_type": "fp"
83
+ },
84
+ "model.language_model.layers.18.mlp.shared_expert_gate": {
85
+ "bits": 16,
86
+ "data_type": "fp"
87
+ },
88
+ "model.language_model.layers.19.mlp.shared_expert_gate": {
89
+ "bits": 16,
90
+ "data_type": "fp"
91
+ },
92
+ "model.language_model.layers.20.mlp.shared_expert_gate": {
93
+ "bits": 16,
94
+ "data_type": "fp"
95
+ },
96
+ "model.language_model.layers.21.mlp.shared_expert_gate": {
97
+ "bits": 16,
98
+ "data_type": "fp"
99
+ },
100
+ "model.language_model.layers.22.mlp.shared_expert_gate": {
101
+ "bits": 16,
102
+ "data_type": "fp"
103
+ },
104
+ "model.language_model.layers.23.mlp.shared_expert_gate": {
105
+ "bits": 16,
106
+ "data_type": "fp"
107
+ },
108
+ "model.language_model.layers.24.mlp.shared_expert_gate": {
109
+ "bits": 16,
110
+ "data_type": "fp"
111
+ },
112
+ "model.language_model.layers.25.mlp.shared_expert_gate": {
113
+ "bits": 16,
114
+ "data_type": "fp"
115
+ },
116
+ "model.language_model.layers.26.mlp.shared_expert_gate": {
117
+ "bits": 16,
118
+ "data_type": "fp"
119
+ },
120
+ "model.language_model.layers.27.mlp.shared_expert_gate": {
121
+ "bits": 16,
122
+ "data_type": "fp"
123
+ },
124
+ "model.language_model.layers.28.mlp.shared_expert_gate": {
125
+ "bits": 16,
126
+ "data_type": "fp"
127
+ },
128
+ "model.language_model.layers.29.mlp.shared_expert_gate": {
129
+ "bits": 16,
130
+ "data_type": "fp"
131
+ },
132
+ "model.language_model.layers.30.mlp.shared_expert_gate": {
133
+ "bits": 16,
134
+ "data_type": "fp"
135
+ },
136
+ "model.language_model.layers.31.mlp.shared_expert_gate": {
137
+ "bits": 16,
138
+ "data_type": "fp"
139
+ },
140
+ "model.language_model.layers.32.mlp.shared_expert_gate": {
141
+ "bits": 16,
142
+ "data_type": "fp"
143
+ },
144
+ "model.language_model.layers.33.mlp.shared_expert_gate": {
145
+ "bits": 16,
146
+ "data_type": "fp"
147
+ },
148
+ "model.language_model.layers.34.mlp.shared_expert_gate": {
149
+ "bits": 16,
150
+ "data_type": "fp"
151
+ },
152
+ "model.language_model.layers.35.mlp.shared_expert_gate": {
153
+ "bits": 16,
154
+ "data_type": "fp"
155
+ },
156
+ "model.language_model.layers.36.mlp.shared_expert_gate": {
157
+ "bits": 16,
158
+ "data_type": "fp"
159
+ },
160
+ "model.language_model.layers.37.mlp.shared_expert_gate": {
161
+ "bits": 16,
162
+ "data_type": "fp"
163
+ },
164
+ "model.language_model.layers.38.mlp.shared_expert_gate": {
165
+ "bits": 16,
166
+ "data_type": "fp"
167
+ },
168
+ "model.language_model.layers.39.mlp.shared_expert_gate": {
169
+ "bits": 16,
170
+ "data_type": "fp"
171
+ },
172
+ "model.language_model.layers.40.mlp.shared_expert_gate": {
173
+ "bits": 16,
174
+ "data_type": "fp"
175
+ },
176
+ "model.language_model.layers.41.mlp.shared_expert_gate": {
177
+ "bits": 16,
178
+ "data_type": "fp"
179
+ },
180
+ "model.language_model.layers.42.mlp.shared_expert_gate": {
181
+ "bits": 16,
182
+ "data_type": "fp"
183
+ },
184
+ "model.language_model.layers.43.mlp.shared_expert_gate": {
185
+ "bits": 16,
186
+ "data_type": "fp"
187
+ },
188
+ "model.language_model.layers.44.mlp.shared_expert_gate": {
189
+ "bits": 16,
190
+ "data_type": "fp"
191
+ },
192
+ "model.language_model.layers.45.mlp.shared_expert_gate": {
193
+ "bits": 16,
194
+ "data_type": "fp"
195
+ },
196
+ "model.language_model.layers.46.mlp.shared_expert_gate": {
197
+ "bits": 16,
198
+ "data_type": "fp"
199
+ },
200
+ "model.language_model.layers.47.mlp.shared_expert_gate": {
201
+ "bits": 16,
202
+ "data_type": "fp"
203
+ },
204
+ "model.language_model.layers.48.mlp.shared_expert_gate": {
205
+ "bits": 16,
206
+ "data_type": "fp"
207
+ },
208
+ "model.language_model.layers.49.mlp.shared_expert_gate": {
209
+ "bits": 16,
210
+ "data_type": "fp"
211
+ },
212
+ "model.language_model.layers.50.mlp.shared_expert_gate": {
213
+ "bits": 16,
214
+ "data_type": "fp"
215
+ },
216
+ "model.language_model.layers.51.mlp.shared_expert_gate": {
217
+ "bits": 16,
218
+ "data_type": "fp"
219
+ },
220
+ "model.language_model.layers.52.mlp.shared_expert_gate": {
221
+ "bits": 16,
222
+ "data_type": "fp"
223
+ },
224
+ "model.language_model.layers.53.mlp.shared_expert_gate": {
225
+ "bits": 16,
226
+ "data_type": "fp"
227
+ },
228
+ "model.language_model.layers.54.mlp.shared_expert_gate": {
229
+ "bits": 16,
230
+ "data_type": "fp"
231
+ },
232
+ "model.language_model.layers.55.mlp.shared_expert_gate": {
233
+ "bits": 16,
234
+ "data_type": "fp"
235
+ },
236
+ "model.language_model.layers.56.mlp.shared_expert_gate": {
237
+ "bits": 16,
238
+ "data_type": "fp"
239
+ },
240
+ "model.language_model.layers.57.mlp.shared_expert_gate": {
241
+ "bits": 16,
242
+ "data_type": "fp"
243
+ },
244
+ "model.language_model.layers.58.mlp.shared_expert_gate": {
245
+ "bits": 16,
246
+ "data_type": "fp"
247
+ },
248
+ "model.language_model.layers.59.mlp.shared_expert_gate": {
249
+ "bits": 16,
250
+ "data_type": "fp"
251
+ },
252
+ "model.language_model.layers.0.linear_attn.out_proj": {
253
+ "bits": 16
254
+ },
255
+ "model.language_model.layers.0.linear_attn.in_proj_qkv": {
256
+ "bits": 16
257
+ },
258
+ "model.language_model.layers.0.linear_attn.in_proj_z": {
259
+ "bits": 16
260
+ },
261
+ "model.language_model.layers.0.linear_attn.in_proj_b": {
262
+ "bits": 16
263
+ },
264
+ "model.language_model.layers.0.linear_attn.in_proj_a": {
265
+ "bits": 16
266
+ },
267
+ "model.language_model.layers.0.mlp.shared_expert.gate_proj": {
268
+ "bits": 16
269
+ },
270
+ "model.language_model.layers.0.mlp.shared_expert.up_proj": {
271
+ "bits": 16
272
+ },
273
+ "model.language_model.layers.0.mlp.shared_expert.down_proj": {
274
+ "bits": 16
275
+ },
276
+ "model.language_model.layers.1.linear_attn.out_proj": {
277
+ "bits": 16
278
+ },
279
+ "model.language_model.layers.1.linear_attn.in_proj_qkv": {
280
+ "bits": 16
281
+ },
282
+ "model.language_model.layers.1.linear_attn.in_proj_z": {
283
+ "bits": 16
284
+ },
285
+ "model.language_model.layers.1.linear_attn.in_proj_b": {
286
+ "bits": 16
287
+ },
288
+ "model.language_model.layers.1.linear_attn.in_proj_a": {
289
+ "bits": 16
290
+ },
291
+ "model.language_model.layers.1.mlp.shared_expert.gate_proj": {
292
+ "bits": 16
293
+ },
294
+ "model.language_model.layers.1.mlp.shared_expert.up_proj": {
295
+ "bits": 16
296
+ },
297
+ "model.language_model.layers.1.mlp.shared_expert.down_proj": {
298
+ "bits": 16
299
+ },
300
+ "model.language_model.layers.2.linear_attn.out_proj": {
301
+ "bits": 16
302
+ },
303
+ "model.language_model.layers.2.linear_attn.in_proj_qkv": {
304
+ "bits": 16
305
+ },
306
+ "model.language_model.layers.2.linear_attn.in_proj_z": {
307
+ "bits": 16
308
+ },
309
+ "model.language_model.layers.2.linear_attn.in_proj_b": {
310
+ "bits": 16
311
+ },
312
+ "model.language_model.layers.2.linear_attn.in_proj_a": {
313
+ "bits": 16
314
+ },
315
+ "model.language_model.layers.2.mlp.shared_expert.gate_proj": {
316
+ "bits": 16
317
+ },
318
+ "model.language_model.layers.2.mlp.shared_expert.up_proj": {
319
+ "bits": 16
320
+ },
321
+ "model.language_model.layers.2.mlp.shared_expert.down_proj": {
322
+ "bits": 16
323
+ },
324
+ "model.language_model.layers.3.self_attn.q_proj": {
325
+ "bits": 16
326
+ },
327
+ "model.language_model.layers.3.self_attn.k_proj": {
328
+ "bits": 16
329
+ },
330
+ "model.language_model.layers.3.self_attn.v_proj": {
331
+ "bits": 16
332
+ },
333
+ "model.language_model.layers.3.self_attn.o_proj": {
334
+ "bits": 16
335
+ },
336
+ "model.language_model.layers.3.mlp.shared_expert.gate_proj": {
337
+ "bits": 16
338
+ },
339
+ "model.language_model.layers.3.mlp.shared_expert.up_proj": {
340
+ "bits": 16
341
+ },
342
+ "model.language_model.layers.3.mlp.shared_expert.down_proj": {
343
+ "bits": 16
344
+ },
345
+ "model.language_model.layers.4.linear_attn.out_proj": {
346
+ "bits": 16
347
+ },
348
+ "model.language_model.layers.4.linear_attn.in_proj_qkv": {
349
+ "bits": 16
350
+ },
351
+ "model.language_model.layers.4.linear_attn.in_proj_z": {
352
+ "bits": 16
353
+ },
354
+ "model.language_model.layers.4.linear_attn.in_proj_b": {
355
+ "bits": 16
356
+ },
357
+ "model.language_model.layers.4.linear_attn.in_proj_a": {
358
+ "bits": 16
359
+ },
360
+ "model.language_model.layers.4.mlp.shared_expert.gate_proj": {
361
+ "bits": 16
362
+ },
363
+ "model.language_model.layers.4.mlp.shared_expert.up_proj": {
364
+ "bits": 16
365
+ },
366
+ "model.language_model.layers.4.mlp.shared_expert.down_proj": {
367
+ "bits": 16
368
+ },
369
+ "model.language_model.layers.5.linear_attn.out_proj": {
370
+ "bits": 16
371
+ },
372
+ "model.language_model.layers.5.linear_attn.in_proj_qkv": {
373
+ "bits": 16
374
+ },
375
+ "model.language_model.layers.5.linear_attn.in_proj_z": {
376
+ "bits": 16
377
+ },
378
+ "model.language_model.layers.5.linear_attn.in_proj_b": {
379
+ "bits": 16
380
+ },
381
+ "model.language_model.layers.5.linear_attn.in_proj_a": {
382
+ "bits": 16
383
+ },
384
+ "model.language_model.layers.5.mlp.shared_expert.gate_proj": {
385
+ "bits": 16
386
+ },
387
+ "model.language_model.layers.5.mlp.shared_expert.up_proj": {
388
+ "bits": 16
389
+ },
390
+ "model.language_model.layers.5.mlp.shared_expert.down_proj": {
391
+ "bits": 16
392
+ },
393
+ "model.language_model.layers.6.linear_attn.out_proj": {
394
+ "bits": 16
395
+ },
396
+ "model.language_model.layers.6.linear_attn.in_proj_qkv": {
397
+ "bits": 16
398
+ },
399
+ "model.language_model.layers.6.linear_attn.in_proj_z": {
400
+ "bits": 16
401
+ },
402
+ "model.language_model.layers.6.linear_attn.in_proj_b": {
403
+ "bits": 16
404
+ },
405
+ "model.language_model.layers.6.linear_attn.in_proj_a": {
406
+ "bits": 16
407
+ },
408
+ "model.language_model.layers.6.mlp.shared_expert.gate_proj": {
409
+ "bits": 16
410
+ },
411
+ "model.language_model.layers.6.mlp.shared_expert.up_proj": {
412
+ "bits": 16
413
+ },
414
+ "model.language_model.layers.6.mlp.shared_expert.down_proj": {
415
+ "bits": 16
416
+ },
417
+ "model.language_model.layers.7.self_attn.q_proj": {
418
+ "bits": 16
419
+ },
420
+ "model.language_model.layers.7.self_attn.k_proj": {
421
+ "bits": 16
422
+ },
423
+ "model.language_model.layers.7.self_attn.v_proj": {
424
+ "bits": 16
425
+ },
426
+ "model.language_model.layers.7.self_attn.o_proj": {
427
+ "bits": 16
428
+ },
429
+ "model.language_model.layers.7.mlp.shared_expert.gate_proj": {
430
+ "bits": 16
431
+ },
432
+ "model.language_model.layers.7.mlp.shared_expert.up_proj": {
433
+ "bits": 16
434
+ },
435
+ "model.language_model.layers.7.mlp.shared_expert.down_proj": {
436
+ "bits": 16
437
+ },
438
+ "model.language_model.layers.8.linear_attn.out_proj": {
439
+ "bits": 16
440
+ },
441
+ "model.language_model.layers.8.linear_attn.in_proj_qkv": {
442
+ "bits": 16
443
+ },
444
+ "model.language_model.layers.8.linear_attn.in_proj_z": {
445
+ "bits": 16
446
+ },
447
+ "model.language_model.layers.8.linear_attn.in_proj_b": {
448
+ "bits": 16
449
+ },
450
+ "model.language_model.layers.8.linear_attn.in_proj_a": {
451
+ "bits": 16
452
+ },
453
+ "model.language_model.layers.8.mlp.shared_expert.gate_proj": {
454
+ "bits": 16
455
+ },
456
+ "model.language_model.layers.8.mlp.shared_expert.up_proj": {
457
+ "bits": 16
458
+ },
459
+ "model.language_model.layers.8.mlp.shared_expert.down_proj": {
460
+ "bits": 16
461
+ },
462
+ "model.language_model.layers.9.linear_attn.out_proj": {
463
+ "bits": 16
464
+ },
465
+ "model.language_model.layers.9.linear_attn.in_proj_qkv": {
466
+ "bits": 16
467
+ },
468
+ "model.language_model.layers.9.linear_attn.in_proj_z": {
469
+ "bits": 16
470
+ },
471
+ "model.language_model.layers.9.linear_attn.in_proj_b": {
472
+ "bits": 16
473
+ },
474
+ "model.language_model.layers.9.linear_attn.in_proj_a": {
475
+ "bits": 16
476
+ },
477
+ "model.language_model.layers.9.mlp.shared_expert.gate_proj": {
478
+ "bits": 16
479
+ },
480
+ "model.language_model.layers.9.mlp.shared_expert.up_proj": {
481
+ "bits": 16
482
+ },
483
+ "model.language_model.layers.9.mlp.shared_expert.down_proj": {
484
+ "bits": 16
485
+ },
486
+ "model.language_model.layers.10.linear_attn.out_proj": {
487
+ "bits": 16
488
+ },
489
+ "model.language_model.layers.10.linear_attn.in_proj_qkv": {
490
+ "bits": 16
491
+ },
492
+ "model.language_model.layers.10.linear_attn.in_proj_z": {
493
+ "bits": 16
494
+ },
495
+ "model.language_model.layers.10.linear_attn.in_proj_b": {
496
+ "bits": 16
497
+ },
498
+ "model.language_model.layers.10.linear_attn.in_proj_a": {
499
+ "bits": 16
500
+ },
501
+ "model.language_model.layers.10.mlp.shared_expert.gate_proj": {
502
+ "bits": 16
503
+ },
504
+ "model.language_model.layers.10.mlp.shared_expert.up_proj": {
505
+ "bits": 16
506
+ },
507
+ "model.language_model.layers.10.mlp.shared_expert.down_proj": {
508
+ "bits": 16
509
+ },
510
+ "model.language_model.layers.11.self_attn.q_proj": {
511
+ "bits": 16
512
+ },
513
+ "model.language_model.layers.11.self_attn.k_proj": {
514
+ "bits": 16
515
+ },
516
+ "model.language_model.layers.11.self_attn.v_proj": {
517
+ "bits": 16
518
+ },
519
+ "model.language_model.layers.11.self_attn.o_proj": {
520
+ "bits": 16
521
+ },
522
+ "model.language_model.layers.11.mlp.shared_expert.gate_proj": {
523
+ "bits": 16
524
+ },
525
+ "model.language_model.layers.11.mlp.shared_expert.up_proj": {
526
+ "bits": 16
527
+ },
528
+ "model.language_model.layers.11.mlp.shared_expert.down_proj": {
529
+ "bits": 16
530
+ },
531
+ "model.language_model.layers.12.linear_attn.out_proj": {
532
+ "bits": 16
533
+ },
534
+ "model.language_model.layers.12.linear_attn.in_proj_qkv": {
535
+ "bits": 16
536
+ },
537
+ "model.language_model.layers.12.linear_attn.in_proj_z": {
538
+ "bits": 16
539
+ },
540
+ "model.language_model.layers.12.linear_attn.in_proj_b": {
541
+ "bits": 16
542
+ },
543
+ "model.language_model.layers.12.linear_attn.in_proj_a": {
544
+ "bits": 16
545
+ },
546
+ "model.language_model.layers.12.mlp.shared_expert.gate_proj": {
547
+ "bits": 16
548
+ },
549
+ "model.language_model.layers.12.mlp.shared_expert.up_proj": {
550
+ "bits": 16
551
+ },
552
+ "model.language_model.layers.12.mlp.shared_expert.down_proj": {
553
+ "bits": 16
554
+ },
555
+ "model.language_model.layers.13.linear_attn.out_proj": {
556
+ "bits": 16
557
+ },
558
+ "model.language_model.layers.13.linear_attn.in_proj_qkv": {
559
+ "bits": 16
560
+ },
561
+ "model.language_model.layers.13.linear_attn.in_proj_z": {
562
+ "bits": 16
563
+ },
564
+ "model.language_model.layers.13.linear_attn.in_proj_b": {
565
+ "bits": 16
566
+ },
567
+ "model.language_model.layers.13.linear_attn.in_proj_a": {
568
+ "bits": 16
569
+ },
570
+ "model.language_model.layers.13.mlp.shared_expert.gate_proj": {
571
+ "bits": 16
572
+ },
573
+ "model.language_model.layers.13.mlp.shared_expert.up_proj": {
574
+ "bits": 16
575
+ },
576
+ "model.language_model.layers.13.mlp.shared_expert.down_proj": {
577
+ "bits": 16
578
+ },
579
+ "model.language_model.layers.14.linear_attn.out_proj": {
580
+ "bits": 16
581
+ },
582
+ "model.language_model.layers.14.linear_attn.in_proj_qkv": {
583
+ "bits": 16
584
+ },
585
+ "model.language_model.layers.14.linear_attn.in_proj_z": {
586
+ "bits": 16
587
+ },
588
+ "model.language_model.layers.14.linear_attn.in_proj_b": {
589
+ "bits": 16
590
+ },
591
+ "model.language_model.layers.14.linear_attn.in_proj_a": {
592
+ "bits": 16
593
+ },
594
+ "model.language_model.layers.14.mlp.shared_expert.gate_proj": {
595
+ "bits": 16
596
+ },
597
+ "model.language_model.layers.14.mlp.shared_expert.up_proj": {
598
+ "bits": 16
599
+ },
600
+ "model.language_model.layers.14.mlp.shared_expert.down_proj": {
601
+ "bits": 16
602
+ },
603
+ "model.language_model.layers.15.self_attn.q_proj": {
604
+ "bits": 16
605
+ },
606
+ "model.language_model.layers.15.self_attn.k_proj": {
607
+ "bits": 16
608
+ },
609
+ "model.language_model.layers.15.self_attn.v_proj": {
610
+ "bits": 16
611
+ },
612
+ "model.language_model.layers.15.self_attn.o_proj": {
613
+ "bits": 16
614
+ },
615
+ "model.language_model.layers.15.mlp.shared_expert.gate_proj": {
616
+ "bits": 16
617
+ },
618
+ "model.language_model.layers.15.mlp.shared_expert.up_proj": {
619
+ "bits": 16
620
+ },
621
+ "model.language_model.layers.15.mlp.shared_expert.down_proj": {
622
+ "bits": 16
623
+ },
624
+ "model.language_model.layers.16.linear_attn.out_proj": {
625
+ "bits": 16
626
+ },
627
+ "model.language_model.layers.16.linear_attn.in_proj_qkv": {
628
+ "bits": 16
629
+ },
630
+ "model.language_model.layers.16.linear_attn.in_proj_z": {
631
+ "bits": 16
632
+ },
633
+ "model.language_model.layers.16.linear_attn.in_proj_b": {
634
+ "bits": 16
635
+ },
636
+ "model.language_model.layers.16.linear_attn.in_proj_a": {
637
+ "bits": 16
638
+ },
639
+ "model.language_model.layers.16.mlp.shared_expert.gate_proj": {
640
+ "bits": 16
641
+ },
642
+ "model.language_model.layers.16.mlp.shared_expert.up_proj": {
643
+ "bits": 16
644
+ },
645
+ "model.language_model.layers.16.mlp.shared_expert.down_proj": {
646
+ "bits": 16
647
+ },
648
+ "model.language_model.layers.17.linear_attn.out_proj": {
649
+ "bits": 16
650
+ },
651
+ "model.language_model.layers.17.linear_attn.in_proj_qkv": {
652
+ "bits": 16
653
+ },
654
+ "model.language_model.layers.17.linear_attn.in_proj_z": {
655
+ "bits": 16
656
+ },
657
+ "model.language_model.layers.17.linear_attn.in_proj_b": {
658
+ "bits": 16
659
+ },
660
+ "model.language_model.layers.17.linear_attn.in_proj_a": {
661
+ "bits": 16
662
+ },
663
+ "model.language_model.layers.17.mlp.shared_expert.gate_proj": {
664
+ "bits": 16
665
+ },
666
+ "model.language_model.layers.17.mlp.shared_expert.up_proj": {
667
+ "bits": 16
668
+ },
669
+ "model.language_model.layers.17.mlp.shared_expert.down_proj": {
670
+ "bits": 16
671
+ },
672
+ "model.language_model.layers.18.linear_attn.out_proj": {
673
+ "bits": 16
674
+ },
675
+ "model.language_model.layers.18.linear_attn.in_proj_qkv": {
676
+ "bits": 16
677
+ },
678
+ "model.language_model.layers.18.linear_attn.in_proj_z": {
679
+ "bits": 16
680
+ },
681
+ "model.language_model.layers.18.linear_attn.in_proj_b": {
682
+ "bits": 16
683
+ },
684
+ "model.language_model.layers.18.linear_attn.in_proj_a": {
685
+ "bits": 16
686
+ },
687
+ "model.language_model.layers.18.mlp.shared_expert.gate_proj": {
688
+ "bits": 16
689
+ },
690
+ "model.language_model.layers.18.mlp.shared_expert.up_proj": {
691
+ "bits": 16
692
+ },
693
+ "model.language_model.layers.18.mlp.shared_expert.down_proj": {
694
+ "bits": 16
695
+ },
696
+ "model.language_model.layers.19.self_attn.q_proj": {
697
+ "bits": 16
698
+ },
699
+ "model.language_model.layers.19.self_attn.k_proj": {
700
+ "bits": 16
701
+ },
702
+ "model.language_model.layers.19.self_attn.v_proj": {
703
+ "bits": 16
704
+ },
705
+ "model.language_model.layers.19.self_attn.o_proj": {
706
+ "bits": 16
707
+ },
708
+ "model.language_model.layers.19.mlp.shared_expert.gate_proj": {
709
+ "bits": 16
710
+ },
711
+ "model.language_model.layers.19.mlp.shared_expert.up_proj": {
712
+ "bits": 16
713
+ },
714
+ "model.language_model.layers.19.mlp.shared_expert.down_proj": {
715
+ "bits": 16
716
+ },
717
+ "model.language_model.layers.20.linear_attn.out_proj": {
718
+ "bits": 16
719
+ },
720
+ "model.language_model.layers.20.linear_attn.in_proj_qkv": {
721
+ "bits": 16
722
+ },
723
+ "model.language_model.layers.20.linear_attn.in_proj_z": {
724
+ "bits": 16
725
+ },
726
+ "model.language_model.layers.20.linear_attn.in_proj_b": {
727
+ "bits": 16
728
+ },
729
+ "model.language_model.layers.20.linear_attn.in_proj_a": {
730
+ "bits": 16
731
+ },
732
+ "model.language_model.layers.20.mlp.shared_expert.gate_proj": {
733
+ "bits": 16
734
+ },
735
+ "model.language_model.layers.20.mlp.shared_expert.up_proj": {
736
+ "bits": 16
737
+ },
738
+ "model.language_model.layers.20.mlp.shared_expert.down_proj": {
739
+ "bits": 16
740
+ },
741
+ "model.language_model.layers.21.linear_attn.out_proj": {
742
+ "bits": 16
743
+ },
744
+ "model.language_model.layers.21.linear_attn.in_proj_qkv": {
745
+ "bits": 16
746
+ },
747
+ "model.language_model.layers.21.linear_attn.in_proj_z": {
748
+ "bits": 16
749
+ },
750
+ "model.language_model.layers.21.linear_attn.in_proj_b": {
751
+ "bits": 16
752
+ },
753
+ "model.language_model.layers.21.linear_attn.in_proj_a": {
754
+ "bits": 16
755
+ },
756
+ "model.language_model.layers.21.mlp.shared_expert.gate_proj": {
757
+ "bits": 16
758
+ },
759
+ "model.language_model.layers.21.mlp.shared_expert.up_proj": {
760
+ "bits": 16
761
+ },
762
+ "model.language_model.layers.21.mlp.shared_expert.down_proj": {
763
+ "bits": 16
764
+ },
765
+ "model.language_model.layers.22.linear_attn.out_proj": {
766
+ "bits": 16
767
+ },
768
+ "model.language_model.layers.22.linear_attn.in_proj_qkv": {
769
+ "bits": 16
770
+ },
771
+ "model.language_model.layers.22.linear_attn.in_proj_z": {
772
+ "bits": 16
773
+ },
774
+ "model.language_model.layers.22.linear_attn.in_proj_b": {
775
+ "bits": 16
776
+ },
777
+ "model.language_model.layers.22.linear_attn.in_proj_a": {
778
+ "bits": 16
779
+ },
780
+ "model.language_model.layers.22.mlp.shared_expert.gate_proj": {
781
+ "bits": 16
782
+ },
783
+ "model.language_model.layers.22.mlp.shared_expert.up_proj": {
784
+ "bits": 16
785
+ },
786
+ "model.language_model.layers.22.mlp.shared_expert.down_proj": {
787
+ "bits": 16
788
+ },
789
+ "model.language_model.layers.23.self_attn.q_proj": {
790
+ "bits": 16
791
+ },
792
+ "model.language_model.layers.23.self_attn.k_proj": {
793
+ "bits": 16
794
+ },
795
+ "model.language_model.layers.23.self_attn.v_proj": {
796
+ "bits": 16
797
+ },
798
+ "model.language_model.layers.23.self_attn.o_proj": {
799
+ "bits": 16
800
+ },
801
+ "model.language_model.layers.23.mlp.shared_expert.gate_proj": {
802
+ "bits": 16
803
+ },
804
+ "model.language_model.layers.23.mlp.shared_expert.up_proj": {
805
+ "bits": 16
806
+ },
807
+ "model.language_model.layers.23.mlp.shared_expert.down_proj": {
808
+ "bits": 16
809
+ },
810
+ "model.language_model.layers.24.linear_attn.out_proj": {
811
+ "bits": 16
812
+ },
813
+ "model.language_model.layers.24.linear_attn.in_proj_qkv": {
814
+ "bits": 16
815
+ },
816
+ "model.language_model.layers.24.linear_attn.in_proj_z": {
817
+ "bits": 16
818
+ },
819
+ "model.language_model.layers.24.linear_attn.in_proj_b": {
820
+ "bits": 16
821
+ },
822
+ "model.language_model.layers.24.linear_attn.in_proj_a": {
823
+ "bits": 16
824
+ },
825
+ "model.language_model.layers.24.mlp.shared_expert.gate_proj": {
826
+ "bits": 16
827
+ },
828
+ "model.language_model.layers.24.mlp.shared_expert.up_proj": {
829
+ "bits": 16
830
+ },
831
+ "model.language_model.layers.24.mlp.shared_expert.down_proj": {
832
+ "bits": 16
833
+ },
834
+ "model.language_model.layers.25.linear_attn.out_proj": {
835
+ "bits": 16
836
+ },
837
+ "model.language_model.layers.25.linear_attn.in_proj_qkv": {
838
+ "bits": 16
839
+ },
840
+ "model.language_model.layers.25.linear_attn.in_proj_z": {
841
+ "bits": 16
842
+ },
843
+ "model.language_model.layers.25.linear_attn.in_proj_b": {
844
+ "bits": 16
845
+ },
846
+ "model.language_model.layers.25.linear_attn.in_proj_a": {
847
+ "bits": 16
848
+ },
849
+ "model.language_model.layers.25.mlp.shared_expert.gate_proj": {
850
+ "bits": 16
851
+ },
852
+ "model.language_model.layers.25.mlp.shared_expert.up_proj": {
853
+ "bits": 16
854
+ },
855
+ "model.language_model.layers.25.mlp.shared_expert.down_proj": {
856
+ "bits": 16
857
+ },
858
+ "model.language_model.layers.26.linear_attn.out_proj": {
859
+ "bits": 16
860
+ },
861
+ "model.language_model.layers.26.linear_attn.in_proj_qkv": {
862
+ "bits": 16
863
+ },
864
+ "model.language_model.layers.26.linear_attn.in_proj_z": {
865
+ "bits": 16
866
+ },
867
+ "model.language_model.layers.26.linear_attn.in_proj_b": {
868
+ "bits": 16
869
+ },
870
+ "model.language_model.layers.26.linear_attn.in_proj_a": {
871
+ "bits": 16
872
+ },
873
+ "model.language_model.layers.26.mlp.shared_expert.gate_proj": {
874
+ "bits": 16
875
+ },
876
+ "model.language_model.layers.26.mlp.shared_expert.up_proj": {
877
+ "bits": 16
878
+ },
879
+ "model.language_model.layers.26.mlp.shared_expert.down_proj": {
880
+ "bits": 16
881
+ },
882
+ "model.language_model.layers.27.self_attn.q_proj": {
883
+ "bits": 16
884
+ },
885
+ "model.language_model.layers.27.self_attn.k_proj": {
886
+ "bits": 16
887
+ },
888
+ "model.language_model.layers.27.self_attn.v_proj": {
889
+ "bits": 16
890
+ },
891
+ "model.language_model.layers.27.self_attn.o_proj": {
892
+ "bits": 16
893
+ },
894
+ "model.language_model.layers.27.mlp.shared_expert.gate_proj": {
895
+ "bits": 16
896
+ },
897
+ "model.language_model.layers.27.mlp.shared_expert.up_proj": {
898
+ "bits": 16
899
+ },
900
+ "model.language_model.layers.27.mlp.shared_expert.down_proj": {
901
+ "bits": 16
902
+ },
903
+ "model.language_model.layers.28.linear_attn.out_proj": {
904
+ "bits": 16
905
+ },
906
+ "model.language_model.layers.28.linear_attn.in_proj_qkv": {
907
+ "bits": 16
908
+ },
909
+ "model.language_model.layers.28.linear_attn.in_proj_z": {
910
+ "bits": 16
911
+ },
912
+ "model.language_model.layers.28.linear_attn.in_proj_b": {
913
+ "bits": 16
914
+ },
915
+ "model.language_model.layers.28.linear_attn.in_proj_a": {
916
+ "bits": 16
917
+ },
918
+ "model.language_model.layers.28.mlp.shared_expert.gate_proj": {
919
+ "bits": 16
920
+ },
921
+ "model.language_model.layers.28.mlp.shared_expert.up_proj": {
922
+ "bits": 16
923
+ },
924
+ "model.language_model.layers.28.mlp.shared_expert.down_proj": {
925
+ "bits": 16
926
+ },
927
+ "model.language_model.layers.29.linear_attn.out_proj": {
928
+ "bits": 16
929
+ },
930
+ "model.language_model.layers.29.linear_attn.in_proj_qkv": {
931
+ "bits": 16
932
+ },
933
+ "model.language_model.layers.29.linear_attn.in_proj_z": {
934
+ "bits": 16
935
+ },
936
+ "model.language_model.layers.29.linear_attn.in_proj_b": {
937
+ "bits": 16
938
+ },
939
+ "model.language_model.layers.29.linear_attn.in_proj_a": {
940
+ "bits": 16
941
+ },
942
+ "model.language_model.layers.29.mlp.shared_expert.gate_proj": {
943
+ "bits": 16
944
+ },
945
+ "model.language_model.layers.29.mlp.shared_expert.up_proj": {
946
+ "bits": 16
947
+ },
948
+ "model.language_model.layers.29.mlp.shared_expert.down_proj": {
949
+ "bits": 16
950
+ },
951
+ "model.language_model.layers.30.linear_attn.out_proj": {
952
+ "bits": 16
953
+ },
954
+ "model.language_model.layers.30.linear_attn.in_proj_qkv": {
955
+ "bits": 16
956
+ },
957
+ "model.language_model.layers.30.linear_attn.in_proj_z": {
958
+ "bits": 16
959
+ },
960
+ "model.language_model.layers.30.linear_attn.in_proj_b": {
961
+ "bits": 16
962
+ },
963
+ "model.language_model.layers.30.linear_attn.in_proj_a": {
964
+ "bits": 16
965
+ },
966
+ "model.language_model.layers.30.mlp.shared_expert.gate_proj": {
967
+ "bits": 16
968
+ },
969
+ "model.language_model.layers.30.mlp.shared_expert.up_proj": {
970
+ "bits": 16
971
+ },
972
+ "model.language_model.layers.30.mlp.shared_expert.down_proj": {
973
+ "bits": 16
974
+ },
975
+ "model.language_model.layers.31.self_attn.q_proj": {
976
+ "bits": 16
977
+ },
978
+ "model.language_model.layers.31.self_attn.k_proj": {
979
+ "bits": 16
980
+ },
981
+ "model.language_model.layers.31.self_attn.v_proj": {
982
+ "bits": 16
983
+ },
984
+ "model.language_model.layers.31.self_attn.o_proj": {
985
+ "bits": 16
986
+ },
987
+ "model.language_model.layers.31.mlp.shared_expert.gate_proj": {
988
+ "bits": 16
989
+ },
990
+ "model.language_model.layers.31.mlp.shared_expert.up_proj": {
991
+ "bits": 16
992
+ },
993
+ "model.language_model.layers.31.mlp.shared_expert.down_proj": {
994
+ "bits": 16
995
+ },
996
+ "model.language_model.layers.32.linear_attn.out_proj": {
997
+ "bits": 16
998
+ },
999
+ "model.language_model.layers.32.linear_attn.in_proj_qkv": {
1000
+ "bits": 16
1001
+ },
1002
+ "model.language_model.layers.32.linear_attn.in_proj_z": {
1003
+ "bits": 16
1004
+ },
1005
+ "model.language_model.layers.32.linear_attn.in_proj_b": {
1006
+ "bits": 16
1007
+ },
1008
+ "model.language_model.layers.32.linear_attn.in_proj_a": {
1009
+ "bits": 16
1010
+ },
1011
+ "model.language_model.layers.32.mlp.shared_expert.gate_proj": {
1012
+ "bits": 16
1013
+ },
1014
+ "model.language_model.layers.32.mlp.shared_expert.up_proj": {
1015
+ "bits": 16
1016
+ },
1017
+ "model.language_model.layers.32.mlp.shared_expert.down_proj": {
1018
+ "bits": 16
1019
+ },
1020
+ "model.language_model.layers.33.linear_attn.out_proj": {
1021
+ "bits": 16
1022
+ },
1023
+ "model.language_model.layers.33.linear_attn.in_proj_qkv": {
1024
+ "bits": 16
1025
+ },
1026
+ "model.language_model.layers.33.linear_attn.in_proj_z": {
1027
+ "bits": 16
1028
+ },
1029
+ "model.language_model.layers.33.linear_attn.in_proj_b": {
1030
+ "bits": 16
1031
+ },
1032
+ "model.language_model.layers.33.linear_attn.in_proj_a": {
1033
+ "bits": 16
1034
+ },
1035
+ "model.language_model.layers.33.mlp.shared_expert.gate_proj": {
1036
+ "bits": 16
1037
+ },
1038
+ "model.language_model.layers.33.mlp.shared_expert.up_proj": {
1039
+ "bits": 16
1040
+ },
1041
+ "model.language_model.layers.33.mlp.shared_expert.down_proj": {
1042
+ "bits": 16
1043
+ },
1044
+ "model.language_model.layers.34.linear_attn.out_proj": {
1045
+ "bits": 16
1046
+ },
1047
+ "model.language_model.layers.34.linear_attn.in_proj_qkv": {
1048
+ "bits": 16
1049
+ },
1050
+ "model.language_model.layers.34.linear_attn.in_proj_z": {
1051
+ "bits": 16
1052
+ },
1053
+ "model.language_model.layers.34.linear_attn.in_proj_b": {
1054
+ "bits": 16
1055
+ },
1056
+ "model.language_model.layers.34.linear_attn.in_proj_a": {
1057
+ "bits": 16
1058
+ },
1059
+ "model.language_model.layers.34.mlp.shared_expert.gate_proj": {
1060
+ "bits": 16
1061
+ },
1062
+ "model.language_model.layers.34.mlp.shared_expert.up_proj": {
1063
+ "bits": 16
1064
+ },
1065
+ "model.language_model.layers.34.mlp.shared_expert.down_proj": {
1066
+ "bits": 16
1067
+ },
1068
+ "model.language_model.layers.35.self_attn.q_proj": {
1069
+ "bits": 16
1070
+ },
1071
+ "model.language_model.layers.35.self_attn.k_proj": {
1072
+ "bits": 16
1073
+ },
1074
+ "model.language_model.layers.35.self_attn.v_proj": {
1075
+ "bits": 16
1076
+ },
1077
+ "model.language_model.layers.35.self_attn.o_proj": {
1078
+ "bits": 16
1079
+ },
1080
+ "model.language_model.layers.35.mlp.shared_expert.gate_proj": {
1081
+ "bits": 16
1082
+ },
1083
+ "model.language_model.layers.35.mlp.shared_expert.up_proj": {
1084
+ "bits": 16
1085
+ },
1086
+ "model.language_model.layers.35.mlp.shared_expert.down_proj": {
1087
+ "bits": 16
1088
+ },
1089
+ "model.language_model.layers.36.linear_attn.out_proj": {
1090
+ "bits": 16
1091
+ },
1092
+ "model.language_model.layers.36.linear_attn.in_proj_qkv": {
1093
+ "bits": 16
1094
+ },
1095
+ "model.language_model.layers.36.linear_attn.in_proj_z": {
1096
+ "bits": 16
1097
+ },
1098
+ "model.language_model.layers.36.linear_attn.in_proj_b": {
1099
+ "bits": 16
1100
+ },
1101
+ "model.language_model.layers.36.linear_attn.in_proj_a": {
1102
+ "bits": 16
1103
+ },
1104
+ "model.language_model.layers.36.mlp.shared_expert.gate_proj": {
1105
+ "bits": 16
1106
+ },
1107
+ "model.language_model.layers.36.mlp.shared_expert.up_proj": {
1108
+ "bits": 16
1109
+ },
1110
+ "model.language_model.layers.36.mlp.shared_expert.down_proj": {
1111
+ "bits": 16
1112
+ },
1113
+ "model.language_model.layers.37.linear_attn.out_proj": {
1114
+ "bits": 16
1115
+ },
1116
+ "model.language_model.layers.37.linear_attn.in_proj_qkv": {
1117
+ "bits": 16
1118
+ },
1119
+ "model.language_model.layers.37.linear_attn.in_proj_z": {
1120
+ "bits": 16
1121
+ },
1122
+ "model.language_model.layers.37.linear_attn.in_proj_b": {
1123
+ "bits": 16
1124
+ },
1125
+ "model.language_model.layers.37.linear_attn.in_proj_a": {
1126
+ "bits": 16
1127
+ },
1128
+ "model.language_model.layers.37.mlp.shared_expert.gate_proj": {
1129
+ "bits": 16
1130
+ },
1131
+ "model.language_model.layers.37.mlp.shared_expert.up_proj": {
1132
+ "bits": 16
1133
+ },
1134
+ "model.language_model.layers.37.mlp.shared_expert.down_proj": {
1135
+ "bits": 16
1136
+ },
1137
+ "model.language_model.layers.38.linear_attn.out_proj": {
1138
+ "bits": 16
1139
+ },
1140
+ "model.language_model.layers.38.linear_attn.in_proj_qkv": {
1141
+ "bits": 16
1142
+ },
1143
+ "model.language_model.layers.38.linear_attn.in_proj_z": {
1144
+ "bits": 16
1145
+ },
1146
+ "model.language_model.layers.38.linear_attn.in_proj_b": {
1147
+ "bits": 16
1148
+ },
1149
+ "model.language_model.layers.38.linear_attn.in_proj_a": {
1150
+ "bits": 16
1151
+ },
1152
+ "model.language_model.layers.38.mlp.shared_expert.gate_proj": {
1153
+ "bits": 16
1154
+ },
1155
+ "model.language_model.layers.38.mlp.shared_expert.up_proj": {
1156
+ "bits": 16
1157
+ },
1158
+ "model.language_model.layers.38.mlp.shared_expert.down_proj": {
1159
+ "bits": 16
1160
+ },
1161
+ "model.language_model.layers.39.self_attn.q_proj": {
1162
+ "bits": 16
1163
+ },
1164
+ "model.language_model.layers.39.self_attn.k_proj": {
1165
+ "bits": 16
1166
+ },
1167
+ "model.language_model.layers.39.self_attn.v_proj": {
1168
+ "bits": 16
1169
+ },
1170
+ "model.language_model.layers.39.self_attn.o_proj": {
1171
+ "bits": 16
1172
+ },
1173
+ "model.language_model.layers.39.mlp.shared_expert.gate_proj": {
1174
+ "bits": 16
1175
+ },
1176
+ "model.language_model.layers.39.mlp.shared_expert.up_proj": {
1177
+ "bits": 16
1178
+ },
1179
+ "model.language_model.layers.39.mlp.shared_expert.down_proj": {
1180
+ "bits": 16
1181
+ },
1182
+ "model.language_model.layers.40.linear_attn.out_proj": {
1183
+ "bits": 16
1184
+ },
1185
+ "model.language_model.layers.40.linear_attn.in_proj_qkv": {
1186
+ "bits": 16
1187
+ },
1188
+ "model.language_model.layers.40.linear_attn.in_proj_z": {
1189
+ "bits": 16
1190
+ },
1191
+ "model.language_model.layers.40.linear_attn.in_proj_b": {
1192
+ "bits": 16
1193
+ },
1194
+ "model.language_model.layers.40.linear_attn.in_proj_a": {
1195
+ "bits": 16
1196
+ },
1197
+ "model.language_model.layers.40.mlp.shared_expert.gate_proj": {
1198
+ "bits": 16
1199
+ },
1200
+ "model.language_model.layers.40.mlp.shared_expert.up_proj": {
1201
+ "bits": 16
1202
+ },
1203
+ "model.language_model.layers.40.mlp.shared_expert.down_proj": {
1204
+ "bits": 16
1205
+ },
1206
+ "model.language_model.layers.41.linear_attn.out_proj": {
1207
+ "bits": 16
1208
+ },
1209
+ "model.language_model.layers.41.linear_attn.in_proj_qkv": {
1210
+ "bits": 16
1211
+ },
1212
+ "model.language_model.layers.41.linear_attn.in_proj_z": {
1213
+ "bits": 16
1214
+ },
1215
+ "model.language_model.layers.41.linear_attn.in_proj_b": {
1216
+ "bits": 16
1217
+ },
1218
+ "model.language_model.layers.41.linear_attn.in_proj_a": {
1219
+ "bits": 16
1220
+ },
1221
+ "model.language_model.layers.41.mlp.shared_expert.gate_proj": {
1222
+ "bits": 16
1223
+ },
1224
+ "model.language_model.layers.41.mlp.shared_expert.up_proj": {
1225
+ "bits": 16
1226
+ },
1227
+ "model.language_model.layers.41.mlp.shared_expert.down_proj": {
1228
+ "bits": 16
1229
+ },
1230
+ "model.language_model.layers.42.linear_attn.out_proj": {
1231
+ "bits": 16
1232
+ },
1233
+ "model.language_model.layers.42.linear_attn.in_proj_qkv": {
1234
+ "bits": 16
1235
+ },
1236
+ "model.language_model.layers.42.linear_attn.in_proj_z": {
1237
+ "bits": 16
1238
+ },
1239
+ "model.language_model.layers.42.linear_attn.in_proj_b": {
1240
+ "bits": 16
1241
+ },
1242
+ "model.language_model.layers.42.linear_attn.in_proj_a": {
1243
+ "bits": 16
1244
+ },
1245
+ "model.language_model.layers.42.mlp.shared_expert.gate_proj": {
1246
+ "bits": 16
1247
+ },
1248
+ "model.language_model.layers.42.mlp.shared_expert.up_proj": {
1249
+ "bits": 16
1250
+ },
1251
+ "model.language_model.layers.42.mlp.shared_expert.down_proj": {
1252
+ "bits": 16
1253
+ },
1254
+ "model.language_model.layers.43.self_attn.q_proj": {
1255
+ "bits": 16
1256
+ },
1257
+ "model.language_model.layers.43.self_attn.k_proj": {
1258
+ "bits": 16
1259
+ },
1260
+ "model.language_model.layers.43.self_attn.v_proj": {
1261
+ "bits": 16
1262
+ },
1263
+ "model.language_model.layers.43.self_attn.o_proj": {
1264
+ "bits": 16
1265
+ },
1266
+ "model.language_model.layers.43.mlp.shared_expert.gate_proj": {
1267
+ "bits": 16
1268
+ },
1269
+ "model.language_model.layers.43.mlp.shared_expert.up_proj": {
1270
+ "bits": 16
1271
+ },
1272
+ "model.language_model.layers.43.mlp.shared_expert.down_proj": {
1273
+ "bits": 16
1274
+ },
1275
+ "model.language_model.layers.44.linear_attn.out_proj": {
1276
+ "bits": 16
1277
+ },
1278
+ "model.language_model.layers.44.linear_attn.in_proj_qkv": {
1279
+ "bits": 16
1280
+ },
1281
+ "model.language_model.layers.44.linear_attn.in_proj_z": {
1282
+ "bits": 16
1283
+ },
1284
+ "model.language_model.layers.44.linear_attn.in_proj_b": {
1285
+ "bits": 16
1286
+ },
1287
+ "model.language_model.layers.44.linear_attn.in_proj_a": {
1288
+ "bits": 16
1289
+ },
1290
+ "model.language_model.layers.44.mlp.shared_expert.gate_proj": {
1291
+ "bits": 16
1292
+ },
1293
+ "model.language_model.layers.44.mlp.shared_expert.up_proj": {
1294
+ "bits": 16
1295
+ },
1296
+ "model.language_model.layers.44.mlp.shared_expert.down_proj": {
1297
+ "bits": 16
1298
+ },
1299
+ "model.language_model.layers.45.linear_attn.out_proj": {
1300
+ "bits": 16
1301
+ },
1302
+ "model.language_model.layers.45.linear_attn.in_proj_qkv": {
1303
+ "bits": 16
1304
+ },
1305
+ "model.language_model.layers.45.linear_attn.in_proj_z": {
1306
+ "bits": 16
1307
+ },
1308
+ "model.language_model.layers.45.linear_attn.in_proj_b": {
1309
+ "bits": 16
1310
+ },
1311
+ "model.language_model.layers.45.linear_attn.in_proj_a": {
1312
+ "bits": 16
1313
+ },
1314
+ "model.language_model.layers.45.mlp.shared_expert.gate_proj": {
1315
+ "bits": 16
1316
+ },
1317
+ "model.language_model.layers.45.mlp.shared_expert.up_proj": {
1318
+ "bits": 16
1319
+ },
1320
+ "model.language_model.layers.45.mlp.shared_expert.down_proj": {
1321
+ "bits": 16
1322
+ },
1323
+ "model.language_model.layers.46.linear_attn.out_proj": {
1324
+ "bits": 16
1325
+ },
1326
+ "model.language_model.layers.46.linear_attn.in_proj_qkv": {
1327
+ "bits": 16
1328
+ },
1329
+ "model.language_model.layers.46.linear_attn.in_proj_z": {
1330
+ "bits": 16
1331
+ },
1332
+ "model.language_model.layers.46.linear_attn.in_proj_b": {
1333
+ "bits": 16
1334
+ },
1335
+ "model.language_model.layers.46.linear_attn.in_proj_a": {
1336
+ "bits": 16
1337
+ },
1338
+ "model.language_model.layers.46.mlp.shared_expert.gate_proj": {
1339
+ "bits": 16
1340
+ },
1341
+ "model.language_model.layers.46.mlp.shared_expert.up_proj": {
1342
+ "bits": 16
1343
+ },
1344
+ "model.language_model.layers.46.mlp.shared_expert.down_proj": {
1345
+ "bits": 16
1346
+ },
1347
+ "model.language_model.layers.47.self_attn.q_proj": {
1348
+ "bits": 16
1349
+ },
1350
+ "model.language_model.layers.47.self_attn.k_proj": {
1351
+ "bits": 16
1352
+ },
1353
+ "model.language_model.layers.47.self_attn.v_proj": {
1354
+ "bits": 16
1355
+ },
1356
+ "model.language_model.layers.47.self_attn.o_proj": {
1357
+ "bits": 16
1358
+ },
1359
+ "model.language_model.layers.47.mlp.shared_expert.gate_proj": {
1360
+ "bits": 16
1361
+ },
1362
+ "model.language_model.layers.47.mlp.shared_expert.up_proj": {
1363
+ "bits": 16
1364
+ },
1365
+ "model.language_model.layers.47.mlp.shared_expert.down_proj": {
1366
+ "bits": 16
1367
+ },
1368
+ "model.language_model.layers.48.linear_attn.out_proj": {
1369
+ "bits": 16
1370
+ },
1371
+ "model.language_model.layers.48.linear_attn.in_proj_qkv": {
1372
+ "bits": 16
1373
+ },
1374
+ "model.language_model.layers.48.linear_attn.in_proj_z": {
1375
+ "bits": 16
1376
+ },
1377
+ "model.language_model.layers.48.linear_attn.in_proj_b": {
1378
+ "bits": 16
1379
+ },
1380
+ "model.language_model.layers.48.linear_attn.in_proj_a": {
1381
+ "bits": 16
1382
+ },
1383
+ "model.language_model.layers.48.mlp.shared_expert.gate_proj": {
1384
+ "bits": 16
1385
+ },
1386
+ "model.language_model.layers.48.mlp.shared_expert.up_proj": {
1387
+ "bits": 16
1388
+ },
1389
+ "model.language_model.layers.48.mlp.shared_expert.down_proj": {
1390
+ "bits": 16
1391
+ },
1392
+ "model.language_model.layers.49.linear_attn.out_proj": {
1393
+ "bits": 16
1394
+ },
1395
+ "model.language_model.layers.49.linear_attn.in_proj_qkv": {
1396
+ "bits": 16
1397
+ },
1398
+ "model.language_model.layers.49.linear_attn.in_proj_z": {
1399
+ "bits": 16
1400
+ },
1401
+ "model.language_model.layers.49.linear_attn.in_proj_b": {
1402
+ "bits": 16
1403
+ },
1404
+ "model.language_model.layers.49.linear_attn.in_proj_a": {
1405
+ "bits": 16
1406
+ },
1407
+ "model.language_model.layers.49.mlp.shared_expert.gate_proj": {
1408
+ "bits": 16
1409
+ },
1410
+ "model.language_model.layers.49.mlp.shared_expert.up_proj": {
1411
+ "bits": 16
1412
+ },
1413
+ "model.language_model.layers.49.mlp.shared_expert.down_proj": {
1414
+ "bits": 16
1415
+ },
1416
+ "model.language_model.layers.50.linear_attn.out_proj": {
1417
+ "bits": 16
1418
+ },
1419
+ "model.language_model.layers.50.linear_attn.in_proj_qkv": {
1420
+ "bits": 16
1421
+ },
1422
+ "model.language_model.layers.50.linear_attn.in_proj_z": {
1423
+ "bits": 16
1424
+ },
1425
+ "model.language_model.layers.50.linear_attn.in_proj_b": {
1426
+ "bits": 16
1427
+ },
1428
+ "model.language_model.layers.50.linear_attn.in_proj_a": {
1429
+ "bits": 16
1430
+ },
1431
+ "model.language_model.layers.50.mlp.shared_expert.gate_proj": {
1432
+ "bits": 16
1433
+ },
1434
+ "model.language_model.layers.50.mlp.shared_expert.up_proj": {
1435
+ "bits": 16
1436
+ },
1437
+ "model.language_model.layers.50.mlp.shared_expert.down_proj": {
1438
+ "bits": 16
1439
+ },
1440
+ "model.language_model.layers.51.self_attn.q_proj": {
1441
+ "bits": 16
1442
+ },
1443
+ "model.language_model.layers.51.self_attn.k_proj": {
1444
+ "bits": 16
1445
+ },
1446
+ "model.language_model.layers.51.self_attn.v_proj": {
1447
+ "bits": 16
1448
+ },
1449
+ "model.language_model.layers.51.self_attn.o_proj": {
1450
+ "bits": 16
1451
+ },
1452
+ "model.language_model.layers.51.mlp.shared_expert.gate_proj": {
1453
+ "bits": 16
1454
+ },
1455
+ "model.language_model.layers.51.mlp.shared_expert.up_proj": {
1456
+ "bits": 16
1457
+ },
1458
+ "model.language_model.layers.51.mlp.shared_expert.down_proj": {
1459
+ "bits": 16
1460
+ },
1461
+ "model.language_model.layers.52.linear_attn.out_proj": {
1462
+ "bits": 16
1463
+ },
1464
+ "model.language_model.layers.52.linear_attn.in_proj_qkv": {
1465
+ "bits": 16
1466
+ },
1467
+ "model.language_model.layers.52.linear_attn.in_proj_z": {
1468
+ "bits": 16
1469
+ },
1470
+ "model.language_model.layers.52.linear_attn.in_proj_b": {
1471
+ "bits": 16
1472
+ },
1473
+ "model.language_model.layers.52.linear_attn.in_proj_a": {
1474
+ "bits": 16
1475
+ },
1476
+ "model.language_model.layers.52.mlp.shared_expert.gate_proj": {
1477
+ "bits": 16
1478
+ },
1479
+ "model.language_model.layers.52.mlp.shared_expert.up_proj": {
1480
+ "bits": 16
1481
+ },
1482
+ "model.language_model.layers.52.mlp.shared_expert.down_proj": {
1483
+ "bits": 16
1484
+ },
1485
+ "model.language_model.layers.53.linear_attn.out_proj": {
1486
+ "bits": 16
1487
+ },
1488
+ "model.language_model.layers.53.linear_attn.in_proj_qkv": {
1489
+ "bits": 16
1490
+ },
1491
+ "model.language_model.layers.53.linear_attn.in_proj_z": {
1492
+ "bits": 16
1493
+ },
1494
+ "model.language_model.layers.53.linear_attn.in_proj_b": {
1495
+ "bits": 16
1496
+ },
1497
+ "model.language_model.layers.53.linear_attn.in_proj_a": {
1498
+ "bits": 16
1499
+ },
1500
+ "model.language_model.layers.53.mlp.shared_expert.gate_proj": {
1501
+ "bits": 16
1502
+ },
1503
+ "model.language_model.layers.53.mlp.shared_expert.up_proj": {
1504
+ "bits": 16
1505
+ },
1506
+ "model.language_model.layers.53.mlp.shared_expert.down_proj": {
1507
+ "bits": 16
1508
+ },
1509
+ "model.language_model.layers.54.linear_attn.out_proj": {
1510
+ "bits": 16
1511
+ },
1512
+ "model.language_model.layers.54.linear_attn.in_proj_qkv": {
1513
+ "bits": 16
1514
+ },
1515
+ "model.language_model.layers.54.linear_attn.in_proj_z": {
1516
+ "bits": 16
1517
+ },
1518
+ "model.language_model.layers.54.linear_attn.in_proj_b": {
1519
+ "bits": 16
1520
+ },
1521
+ "model.language_model.layers.54.linear_attn.in_proj_a": {
1522
+ "bits": 16
1523
+ },
1524
+ "model.language_model.layers.54.mlp.shared_expert.gate_proj": {
1525
+ "bits": 16
1526
+ },
1527
+ "model.language_model.layers.54.mlp.shared_expert.up_proj": {
1528
+ "bits": 16
1529
+ },
1530
+ "model.language_model.layers.54.mlp.shared_expert.down_proj": {
1531
+ "bits": 16
1532
+ },
1533
+ "model.language_model.layers.55.self_attn.q_proj": {
1534
+ "bits": 16
1535
+ },
1536
+ "model.language_model.layers.55.self_attn.k_proj": {
1537
+ "bits": 16
1538
+ },
1539
+ "model.language_model.layers.55.self_attn.v_proj": {
1540
+ "bits": 16
1541
+ },
1542
+ "model.language_model.layers.55.self_attn.o_proj": {
1543
+ "bits": 16
1544
+ },
1545
+ "model.language_model.layers.55.mlp.shared_expert.gate_proj": {
1546
+ "bits": 16
1547
+ },
1548
+ "model.language_model.layers.55.mlp.shared_expert.up_proj": {
1549
+ "bits": 16
1550
+ },
1551
+ "model.language_model.layers.55.mlp.shared_expert.down_proj": {
1552
+ "bits": 16
1553
+ },
1554
+ "model.language_model.layers.56.linear_attn.out_proj": {
1555
+ "bits": 16
1556
+ },
1557
+ "model.language_model.layers.56.linear_attn.in_proj_qkv": {
1558
+ "bits": 16
1559
+ },
1560
+ "model.language_model.layers.56.linear_attn.in_proj_z": {
1561
+ "bits": 16
1562
+ },
1563
+ "model.language_model.layers.56.linear_attn.in_proj_b": {
1564
+ "bits": 16
1565
+ },
1566
+ "model.language_model.layers.56.linear_attn.in_proj_a": {
1567
+ "bits": 16
1568
+ },
1569
+ "model.language_model.layers.56.mlp.shared_expert.gate_proj": {
1570
+ "bits": 16
1571
+ },
1572
+ "model.language_model.layers.56.mlp.shared_expert.up_proj": {
1573
+ "bits": 16
1574
+ },
1575
+ "model.language_model.layers.56.mlp.shared_expert.down_proj": {
1576
+ "bits": 16
1577
+ },
1578
+ "model.language_model.layers.57.linear_attn.out_proj": {
1579
+ "bits": 16
1580
+ },
1581
+ "model.language_model.layers.57.linear_attn.in_proj_qkv": {
1582
+ "bits": 16
1583
+ },
1584
+ "model.language_model.layers.57.linear_attn.in_proj_z": {
1585
+ "bits": 16
1586
+ },
1587
+ "model.language_model.layers.57.linear_attn.in_proj_b": {
1588
+ "bits": 16
1589
+ },
1590
+ "model.language_model.layers.57.linear_attn.in_proj_a": {
1591
+ "bits": 16
1592
+ },
1593
+ "model.language_model.layers.57.mlp.shared_expert.gate_proj": {
1594
+ "bits": 16
1595
+ },
1596
+ "model.language_model.layers.57.mlp.shared_expert.up_proj": {
1597
+ "bits": 16
1598
+ },
1599
+ "model.language_model.layers.57.mlp.shared_expert.down_proj": {
1600
+ "bits": 16
1601
+ },
1602
+ "model.language_model.layers.58.linear_attn.out_proj": {
1603
+ "bits": 16
1604
+ },
1605
+ "model.language_model.layers.58.linear_attn.in_proj_qkv": {
1606
+ "bits": 16
1607
+ },
1608
+ "model.language_model.layers.58.linear_attn.in_proj_z": {
1609
+ "bits": 16
1610
+ },
1611
+ "model.language_model.layers.58.linear_attn.in_proj_b": {
1612
+ "bits": 16
1613
+ },
1614
+ "model.language_model.layers.58.linear_attn.in_proj_a": {
1615
+ "bits": 16
1616
+ },
1617
+ "model.language_model.layers.58.mlp.shared_expert.gate_proj": {
1618
+ "bits": 16
1619
+ },
1620
+ "model.language_model.layers.58.mlp.shared_expert.up_proj": {
1621
+ "bits": 16
1622
+ },
1623
+ "model.language_model.layers.58.mlp.shared_expert.down_proj": {
1624
+ "bits": 16
1625
+ },
1626
+ "model.language_model.layers.59.self_attn.q_proj": {
1627
+ "bits": 16
1628
+ },
1629
+ "model.language_model.layers.59.self_attn.k_proj": {
1630
+ "bits": 16
1631
+ },
1632
+ "model.language_model.layers.59.self_attn.v_proj": {
1633
+ "bits": 16
1634
+ },
1635
+ "model.language_model.layers.59.self_attn.o_proj": {
1636
+ "bits": 16
1637
+ },
1638
+ "model.language_model.layers.59.mlp.shared_expert.gate_proj": {
1639
+ "bits": 16
1640
+ },
1641
+ "model.language_model.layers.59.mlp.shared_expert.up_proj": {
1642
+ "bits": 16
1643
+ },
1644
+ "model.language_model.layers.59.mlp.shared_expert.down_proj": {
1645
+ "bits": 16
1646
+ }
1647
+ }
1648
+ }