willopcbeta commited on
Commit
feedd7b
·
verified ·
1 Parent(s): 80733d8

Upload 65 files

Browse files
Files changed (2) hide show
  1. mlc-chat-config.json +90 -90
  2. ndarray-cache.json +0 -0
mlc-chat-config.json CHANGED
@@ -1,91 +1,91 @@
1
- {
2
- "version": "0.1.0",
3
- "model_type": "llama",
4
- "quantization": "q4f16_1",
5
- "model_config": {
6
- "hidden_size": 3072,
7
- "intermediate_size": 8192,
8
- "num_attention_heads": 24,
9
- "num_hidden_layers": 28,
10
- "rms_norm_eps": 1e-05,
11
- "vocab_size": 128256,
12
- "tie_word_embeddings": false,
13
- "position_embedding_base": 500000.0,
14
- "rope_scaling": {
15
- "factor": 32.0,
16
- "high_freq_factor": 4.0,
17
- "low_freq_factor": 1.0,
18
- "original_max_position_embeddings": 8192,
19
- "rope_type": "llama3"
20
- },
21
- "context_window_size": 131072,
22
- "prefill_chunk_size": 8192,
23
- "num_key_value_heads": 8,
24
- "head_dim": 128,
25
- "tensor_parallel_shards": 1,
26
- "pipeline_parallel_stages": 1,
27
- "max_batch_size": 128,
28
- "disaggregation": false
29
- },
30
- "vocab_size": 128256,
31
- "context_window_size": 131072,
32
- "sliding_window_size": -1,
33
- "prefill_chunk_size": 8192,
34
- "attention_sink_size": -1,
35
- "tensor_parallel_shards": 1,
36
- "pipeline_parallel_stages": 1,
37
- "temperature": 1.0,
38
- "presence_penalty": 0.0,
39
- "frequency_penalty": 0.0,
40
- "repetition_penalty": 1.0,
41
- "top_p": 1.0,
42
- "tokenizer_files": [
43
- "tokenizer.json",
44
- "tokenizer_config.json"
45
- ],
46
- "tokenizer_info": {
47
- "token_postproc_method": "byte_level",
48
- "prepend_space_in_encode": false,
49
- "strip_space_in_decode": false
50
- },
51
- "conv_template": {
52
- "name": "llama-3_1",
53
- "system_template": "<|start_header_id|>system<|end_header_id|>\n\n{system_message}<|eot_id|>",
54
- "system_message": "You are a helpful, respectful and honest assistant.",
55
- "system_prefix_token_ids": [
56
- 128000
57
- ],
58
- "add_role_after_system_message": true,
59
- "roles": {
60
- "user": "<|start_header_id|>user",
61
- "assistant": "<|start_header_id|>assistant",
62
- "tool": "<|start_header_id|>ipython"
63
- },
64
- "role_templates": {
65
- "user": "{user_message}",
66
- "assistant": "{assistant_message}",
67
- "tool": "{tool_message}"
68
- },
69
- "messages": [],
70
- "seps": [
71
- "<|eot_id|>"
72
- ],
73
- "role_content_sep": "<|end_header_id|>\n\n",
74
- "role_empty_sep": "<|end_header_id|>\n\n",
75
- "stop_str": [],
76
- "stop_token_ids": [
77
- 128001,
78
- 128008,
79
- 128009
80
- ],
81
- "function_string": "",
82
- "use_function_calling": false
83
- },
84
- "pad_token_id": 0,
85
- "bos_token_id": 128000,
86
- "eos_token_id": [
87
- 128001,
88
- 128008,
89
- 128009
90
- ]
91
  }
 
1
+ {
2
+ "version": "0.1.0",
3
+ "model_type": "llama",
4
+ "quantization": "q4f16_1",
5
+ "model_config": {
6
+ "hidden_size": 3072,
7
+ "intermediate_size": 8192,
8
+ "num_attention_heads": 24,
9
+ "num_hidden_layers": 28,
10
+ "rms_norm_eps": 1e-05,
11
+ "vocab_size": 128256,
12
+ "tie_word_embeddings": false,
13
+ "position_embedding_base": 500000.0,
14
+ "rope_scaling": {
15
+ "factor": 32.0,
16
+ "high_freq_factor": 4.0,
17
+ "low_freq_factor": 1.0,
18
+ "original_max_position_embeddings": 8192,
19
+ "rope_type": "llama3"
20
+ },
21
+ "context_window_size": 131072,
22
+ "prefill_chunk_size": 8192,
23
+ "num_key_value_heads": 8,
24
+ "head_dim": 128,
25
+ "tensor_parallel_shards": 1,
26
+ "pipeline_parallel_stages": 1,
27
+ "max_batch_size": 128,
28
+ "disaggregation": false
29
+ },
30
+ "vocab_size": 128256,
31
+ "context_window_size": 131072,
32
+ "sliding_window_size": -1,
33
+ "prefill_chunk_size": 8192,
34
+ "attention_sink_size": -1,
35
+ "tensor_parallel_shards": 1,
36
+ "pipeline_parallel_stages": 1,
37
+ "temperature": 1.0,
38
+ "presence_penalty": 0.0,
39
+ "frequency_penalty": 0.0,
40
+ "repetition_penalty": 1.0,
41
+ "top_p": 1.0,
42
+ "tokenizer_files": [
43
+ "tokenizer.json",
44
+ "tokenizer_config.json"
45
+ ],
46
+ "tokenizer_info": {
47
+ "token_postproc_method": "byte_level",
48
+ "prepend_space_in_encode": false,
49
+ "strip_space_in_decode": false
50
+ },
51
+ "conv_template": {
52
+ "name": "llama-3_1",
53
+ "system_template": "<|start_header_id|>system<|end_header_id|>\n\n{system_message}<|eot_id|>",
54
+ "system_message": "You are a helpful, respectful and honest assistant.",
55
+ "system_prefix_token_ids": [
56
+ 128000
57
+ ],
58
+ "add_role_after_system_message": true,
59
+ "roles": {
60
+ "user": "<|start_header_id|>user",
61
+ "assistant": "<|start_header_id|>assistant",
62
+ "tool": "<|start_header_id|>ipython"
63
+ },
64
+ "role_templates": {
65
+ "user": "{user_message}",
66
+ "assistant": "{assistant_message}",
67
+ "tool": "{tool_message}"
68
+ },
69
+ "messages": [],
70
+ "seps": [
71
+ "<|eot_id|>"
72
+ ],
73
+ "role_content_sep": "<|end_header_id|>\n\n",
74
+ "role_empty_sep": "<|end_header_id|>\n\n",
75
+ "stop_str": [],
76
+ "stop_token_ids": [
77
+ 128001,
78
+ 128008,
79
+ 128009
80
+ ],
81
+ "function_string": "",
82
+ "use_function_calling": false
83
+ },
84
+ "pad_token_id": 0,
85
+ "bos_token_id": 128000,
86
+ "eos_token_id": [
87
+ 128001,
88
+ 128008,
89
+ 128009
90
+ ]
91
  }
ndarray-cache.json ADDED
The diff for this file is too large to render. See raw diff