DeProgrammer commited on
Commit
b0e370f
·
verified ·
1 Parent(s): 3a7dc19

Upload folder using huggingface_hub

Browse files
Files changed (10) hide show
  1. .gitattributes +2 -0
  2. README.md +20 -3
  3. config.json +10 -0
  4. embeddings_bf16.bin +3 -0
  5. export_args.json +42 -0
  6. llm.mnn +3 -0
  7. llm.mnn.json +0 -0
  8. llm.mnn.weight +3 -0
  9. llm_config.json +11 -0
  10. tokenizer.txt +0 -0
.gitattributes CHANGED
@@ -33,3 +33,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ llm.mnn filter=lfs diff=lfs merge=lfs -text
37
+ llm.mnn.weight filter=lfs diff=lfs merge=lfs -text
README.md CHANGED
@@ -1,3 +1,20 @@
1
- ---
2
- license: mit
3
- ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: apache-2.0
3
+ language:
4
+ - en
5
+ base_model: shisa-ai/shisa-v2.1-unphi4-14b
6
+ base_model_relation: quantized
7
+ pipeline_tag: text-generation
8
+ library_name: mnn
9
+ tags:
10
+ - code
11
+ - mnn
12
+ ---
13
+
14
+ Warning: This model was not verified with the MNN Chat app; it wouldn't load on my 12 GB RAM phone.
15
+
16
+ This model [DeProgrammer/shisa-v2.1-unphi4-14b](https://huggingface.co/DeProgrammer/shisa-v2.1-unphi4-14b-MNN) was
17
+ converted to MNN format from [shisa-ai/shisa-v2.1-unphi4-14b](https://huggingface.co/shisa-ai/shisa-v2.1-unphi4-14b)
18
+ using [llmexport.py](https://github.com/alibaba/MNN/issues/4153#issuecomment-3866182869) in [MNN version **3.4.0**](https://github.com/alibaba/MNN/commit/a874b302f094599e2838a9186e5ce2cf6a81a7a7) with default settings (4-bit quantization).
19
+
20
+ Inference can be run via MNN, e.g., MNN Chat on Android.
config.json ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "llm_model": "llm.mnn",
3
+ "llm_weight": "llm.mnn.weight",
4
+ "backend_type": "cpu",
5
+ "thread_num": 4,
6
+ "precision": "low",
7
+ "memory": "low",
8
+ "sampler_type": "penalty",
9
+ "penalty": 1.1
10
+ }
embeddings_bf16.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:98d761d26e7667c8bd09ba71a7c496c54c5309778de0e77b112ab28a0263603e
3
+ size 1027604480
export_args.json ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "path": "shisa-ai/shisa-v2.1-unphi4-14b",
3
+ "type": null,
4
+ "tokenizer_path": "shisa-ai/shisa-v2.1-unphi4-14b",
5
+ "eagle_path": null,
6
+ "lora_path": null,
7
+ "gptq_path": null,
8
+ "dst_path": "./model",
9
+ "verbose": false,
10
+ "test": null,
11
+ "export": "mnn",
12
+ "onnx_slim": false,
13
+ "quant_bit": 4,
14
+ "quant_block": 64,
15
+ "visual_quant_bit": null,
16
+ "visual_quant_block": null,
17
+ "lm_quant_bit": 4,
18
+ "lm_quant_block": 64,
19
+ "mnnconvert": "../../../build/MNNConvert",
20
+ "ppl": false,
21
+ "awq": false,
22
+ "hqq": false,
23
+ "omni": false,
24
+ "transformer_fuse": false,
25
+ "group_conv_native": false,
26
+ "smooth": false,
27
+ "sym": false,
28
+ "visual_sym": false,
29
+ "seperate_embed": false,
30
+ "lora_split": false,
31
+ "calib_data": null,
32
+ "act_bit": 16,
33
+ "embed_bit": 16,
34
+ "act_sym": false,
35
+ "quant_config": null,
36
+ "generate_for_npu": false,
37
+ "skip_weight": false,
38
+ "omni_epochs": 20,
39
+ "omni_lr": 0.005,
40
+ "omni_wd": 0.0001,
41
+ "tie_word_embeddings": false
42
+ }
llm.mnn ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:75c7945829b747825e3fc92f33dae0f588bfb75b7b66754eca5533a9ddf8338f
3
+ size 696896
llm.mnn.json ADDED
The diff for this file is too large to render. See raw diff
 
llm.mnn.weight ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6d4537c84c8771d2788ac84cf5fc005dcda45d519ebf7491f3783ca3034ea2f6
3
+ size 8844130346
llm_config.json ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model_type": "llama",
3
+ "hidden_size": 5120,
4
+ "attention_mask": "float",
5
+ "attention_type": "full",
6
+ "jinja": {
7
+ "chat_template": "{% for message in messages %}{% if (message['role'] == 'system') %}{{'<|im_start|>system<|im_sep|>' + message['content'] + '<|im_end|>'}}{% elif (message['role'] == 'user') %}{{'<|im_start|>user<|im_sep|>' + message['content'] + '<|im_end|>'}}{% elif (message['role'] == 'assistant') %}{{'<|im_start|>assistant<|im_sep|>' + message['content'] + '<|im_end|>'}}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant<|im_sep|>' }}{% endif %}",
8
+ "bos": "<|endoftext|>",
9
+ "eos": "<|im_end|>"
10
+ }
11
+ }
tokenizer.txt ADDED
The diff for this file is too large to render. See raw diff