|
|
--- |
|
|
quantized_by: anikifoss |
|
|
pipeline_tag: text-generation |
|
|
base_model: Qwen/Qwen3-Coder-480B-A35B-Instruct |
|
|
license: apache-2.0 |
|
|
base_model_relation: quantized |
|
|
tags: |
|
|
- conversational |
|
|
- gguf |
|
|
- no_imatrix |
|
|
--- |
|
|
|
|
|
# Model Card |
|
|
|
|
|
High quality quantization of **Qwen3-Coder-480B-A35B-Instruct** without using imatrix. |
|
|
|
|
|
# Run |
|
|
|
|
|
## ik_llama.cpp |
|
|
|
|
|
See [this detailed guide](https://github.com/ikawrakow/ik_llama.cpp/discussions/258) on how to setup ik_llama and how to make custom quants. |
|
|
|
|
|
``` |
|
|
./build/bin/llama-server \ |
|
|
--alias anikifoss/Qwen3-Coder-480B-A35B-Instruct-HQ4_K \ |
|
|
--model /mnt/data/Models/anikifoss/Qwen3-Coder-480B-A35B-Instruct-HQ4_K/Qwen3-Coder-480B-A35B-Instruct-HQ4_K-00001-of-00007.gguf \ |
|
|
--no-mmap -rtr \ |
|
|
--temp 0.5 --top-k 0 --top-p 1.0 --min-p 0.1 --repeat-penalty 1.0 \ |
|
|
--ctx-size 51000 \ |
|
|
-ctk f16 -ctv f16 \ |
|
|
-fa \ |
|
|
-b 1024 -ub 1024 \ |
|
|
-fmoe \ |
|
|
--n-gpu-layers 99 \ |
|
|
--override-tensor exps=CPU \ |
|
|
--parallel 1 \ |
|
|
--threads 32 \ |
|
|
--threads-batch 64 \ |
|
|
--host 127.0.0.1 \ |
|
|
--port 8090 |
|
|
``` |
|
|
|
|
|
## llama.cpp |
|
|
|
|
|
``` |
|
|
./build/bin/llama-server \ |
|
|
--alias anikifoss/Qwen3-Coder-480B-A35B-Instruct-HQ4_K \ |
|
|
--model /mnt/data/Models/anikifoss/Qwen3-Coder-480B-A35B-Instruct-HQ4_K/Qwen3-Coder-480B-A35B-Instruct-HQ4_K-00001-of-00007.gguf \ |
|
|
--no-mmap \ |
|
|
--temp 0.5 --top-k 0 --top-p 1.0 --min-p 0.1 --repeat-penalty 1.0 \ |
|
|
--ctx-size 51000 \ |
|
|
-ctk f16 -ctv f16 \ |
|
|
-fa \ |
|
|
-b 1024 -ub 1024 \ |
|
|
--n-gpu-layers 99 \ |
|
|
--override-tensor exps=CPU \ |
|
|
--parallel 1 \ |
|
|
--threads 32 \ |
|
|
--threads-batch 64 \ |
|
|
--host 127.0.0.1 \ |
|
|
--port 8090 |
|
|
``` |
|
|
|
|
|
## Quantization Recipe |
|
|
Quantized with [ik_llama](https://github.com/ikawrakow/ik_llama.cpp), but should work with any GGUF compatible inference framework. |
|
|
|
|
|
```bash |
|
|
#!/usr/bin/env bash |
|
|
|
|
|
custom=" |
|
|
# Token embedding and output tensors |
|
|
output\.weight=bf16 |
|
|
output_norm\.weight=f32 |
|
|
token_embd\.weight=bf16 |
|
|
|
|
|
blk\.[0-9]\.attn_k\.weight=q8_0 |
|
|
blk\.[0-9]\.attn_k_norm\.weight=f32 |
|
|
blk\.[0-9]\.attn_norm\.weight=f32 |
|
|
blk\.[0-9]\.attn_output\.weight=q8_0 |
|
|
blk\.[0-9]\.attn_q\.weight=q8_0 |
|
|
blk\.[0-9]\.attn_q_norm\.weight=f32 |
|
|
blk\.[0-9]\.attn_v\.weight=q8_0 |
|
|
blk\.[0-9]\.ffn_down_exps\.weight=q6_K |
|
|
blk\.[0-9]\.ffn_gate_exps\.weight=q4_K |
|
|
blk\.[0-9]\.ffn_up_exps\.weight=q4_K |
|
|
blk\.[0-9]\.ffn_gate_inp\.weight=f32 |
|
|
blk\.[0-9]\.ffn_norm\.weight=f32 |
|
|
blk\.[1-5][0-9]\.attn_k\.weight=q8_0 |
|
|
blk\.[1-5][0-9]\.attn_k_norm\.weight=f32 |
|
|
blk\.[1-5][0-9]\.attn_norm\.weight=f32 |
|
|
blk\.[1-5][0-9]\.attn_output\.weight=q8_0 |
|
|
blk\.[1-5][0-9]\.attn_q\.weight=q8_0 |
|
|
blk\.[1-5][0-9]\.attn_q_norm\.weight=f32 |
|
|
blk\.[1-5][0-9]\.attn_v\.weight=q8_0 |
|
|
blk\.[1-5][0-9]\.ffn_down_exps\.weight=q6_K |
|
|
blk\.[1-5][0-9]\.ffn_gate_exps\.weight=q4_K |
|
|
blk\.[1-5][0-9]\.ffn_up_exps\.weight=q4_K |
|
|
blk\.[1-5][0-9]\.ffn_gate_inp\.weight=f32 |
|
|
blk\.[1-5][0-9]\.ffn_norm\.weight=f32 |
|
|
blk\.6[0-1]\.attn_k\.weight=q8_0 |
|
|
blk\.6[0-1]\.attn_k_norm\.weight=f32 |
|
|
blk\.6[0-1]\.attn_norm\.weight=f32 |
|
|
blk\.6[0-1]\.attn_output\.weight=q8_0 |
|
|
blk\.6[0-1]\.attn_q\.weight=q8_0 |
|
|
blk\.6[0-1]\.attn_q_norm\.weight=f32 |
|
|
blk\.6[0-1]\.attn_v\.weight=q8_0 |
|
|
blk\.6[0-1]\.ffn_down_exps\.weight=q6_K |
|
|
blk\.6[0-1]\.ffn_gate_exps\.weight=q4_K |
|
|
blk\.6[0-1]\.ffn_up_exps\.weight=q4_K |
|
|
blk\.6[0-1]\.ffn_gate_inp\.weight=f32 |
|
|
blk\.6[0-1]\.ffn_norm\.weight=f32 |
|
|
" |
|
|
|
|
|
custom=$( |
|
|
echo "$custom" | grep -v '^#' | \ |
|
|
sed -Ez 's:\n+:,:g;s:,$::;s:^,::' |
|
|
) |
|
|
|
|
|
echo "Running with: -custom-q $custom" |
|
|
|
|
|
TARGET_MODEL="Qwen3-Coder-480B-A35B-Instruct-HQ4_K" |
|
|
mkdir -p ~/Env/models/anikifoss/$TARGET_MODEL |
|
|
./build/bin/llama-quantize \ |
|
|
--custom-q "$custom" \ |
|
|
/mnt/data/Models/Qwen/Qwen3-Coder-480B-A35B-Instruct-GGUF/Qwen3-Coder-480B-A35B-Instruct-BF16-00001-of-00021.gguf \ |
|
|
~/Env/models/anikifoss/$TARGET_MODEL/$TARGET_MODEL.gguf \ |
|
|
Q4_K \ |
|
|
32 |
|
|
``` |
|
|
|