Update README.md

8f83938 verified 12 days ago

3.75 kB

	---
	quantized_by: anikifoss
	pipeline_tag: text-generation
	base_model: Qwen/Qwen3-Coder-480B-A35B-Instruct
	license: apache-2.0
	base_model_relation: quantized
	tags:
	- conversational
	- gguf
	- no_imatrix
	---

	# Model Card

	High quality quantization of Qwen3-Coder-480B-A35B-Instruct without using imatrix.

	# Run

	## ik_llama.cpp

	See [this detailed guide](https://github.com/ikawrakow/ik_llama.cpp/discussions/258) on how to setup ik_llama and how to make custom quants.

	```
	./build/bin/llama-server \
	--alias anikifoss/Qwen3-Coder-480B-A35B-Instruct-HQ4_K \
	--model /mnt/data/Models/anikifoss/Qwen3-Coder-480B-A35B-Instruct-HQ4_K/Qwen3-Coder-480B-A35B-Instruct-HQ4_K-00001-of-00007.gguf \
	--no-mmap -rtr \
	--temp 0.5 --top-k 0 --top-p 1.0 --min-p 0.1 --repeat-penalty 1.0 \
	--ctx-size 51000 \
	-ctk f16 -ctv f16 \
	-fa \
	-b 1024 -ub 1024 \
	-fmoe \
	--n-gpu-layers 99 \
	--override-tensor exps=CPU \
	--parallel 1 \
	--threads 32 \
	--threads-batch 64 \
	--host 127.0.0.1 \
	--port 8090
	```

	## llama.cpp

	```
	./build/bin/llama-server \
	--alias anikifoss/Qwen3-Coder-480B-A35B-Instruct-HQ4_K \
	--model /mnt/data/Models/anikifoss/Qwen3-Coder-480B-A35B-Instruct-HQ4_K/Qwen3-Coder-480B-A35B-Instruct-HQ4_K-00001-of-00007.gguf \
	--no-mmap \
	--temp 0.5 --top-k 0 --top-p 1.0 --min-p 0.1 --repeat-penalty 1.0 \
	--ctx-size 51000 \
	-ctk f16 -ctv f16 \
	-fa \
	-b 1024 -ub 1024 \
	--n-gpu-layers 99 \
	--override-tensor exps=CPU \
	--parallel 1 \
	--threads 32 \
	--threads-batch 64 \
	--host 127.0.0.1 \
	--port 8090
	```

	## Quantization Recipe
	Quantized with [ik_llama](https://github.com/ikawrakow/ik_llama.cpp), but should work with any GGUF compatible inference framework.

	```bash
	#!/usr/bin/env bash

	custom="
	# Token embedding and output tensors
	output\.weight=bf16
	output_norm\.weight=f32
	token_embd\.weight=bf16

	blk\.[0-9]\.attn_k\.weight=q8_0
	blk\.[0-9]\.attn_k_norm\.weight=f32
	blk\.[0-9]\.attn_norm\.weight=f32
	blk\.[0-9]\.attn_output\.weight=q8_0
	blk\.[0-9]\.attn_q\.weight=q8_0
	blk\.[0-9]\.attn_q_norm\.weight=f32
	blk\.[0-9]\.attn_v\.weight=q8_0
	blk\.[0-9]\.ffn_down_exps\.weight=q6_K
	blk\.[0-9]\.ffn_gate_exps\.weight=q4_K
	blk\.[0-9]\.ffn_up_exps\.weight=q4_K
	blk\.[0-9]\.ffn_gate_inp\.weight=f32
	blk\.[0-9]\.ffn_norm\.weight=f32
	blk\.[1-5][0-9]\.attn_k\.weight=q8_0
	blk\.[1-5][0-9]\.attn_k_norm\.weight=f32
	blk\.[1-5][0-9]\.attn_norm\.weight=f32
	blk\.[1-5][0-9]\.attn_output\.weight=q8_0
	blk\.[1-5][0-9]\.attn_q\.weight=q8_0
	blk\.[1-5][0-9]\.attn_q_norm\.weight=f32
	blk\.[1-5][0-9]\.attn_v\.weight=q8_0
	blk\.[1-5][0-9]\.ffn_down_exps\.weight=q6_K
	blk\.[1-5][0-9]\.ffn_gate_exps\.weight=q4_K
	blk\.[1-5][0-9]\.ffn_up_exps\.weight=q4_K
	blk\.[1-5][0-9]\.ffn_gate_inp\.weight=f32
	blk\.[1-5][0-9]\.ffn_norm\.weight=f32
	blk\.6[0-1]\.attn_k\.weight=q8_0
	blk\.6[0-1]\.attn_k_norm\.weight=f32
	blk\.6[0-1]\.attn_norm\.weight=f32
	blk\.6[0-1]\.attn_output\.weight=q8_0
	blk\.6[0-1]\.attn_q\.weight=q8_0
	blk\.6[0-1]\.attn_q_norm\.weight=f32
	blk\.6[0-1]\.attn_v\.weight=q8_0
	blk\.6[0-1]\.ffn_down_exps\.weight=q6_K
	blk\.6[0-1]\.ffn_gate_exps\.weight=q4_K
	blk\.6[0-1]\.ffn_up_exps\.weight=q4_K
	blk\.6[0-1]\.ffn_gate_inp\.weight=f32
	blk\.6[0-1]\.ffn_norm\.weight=f32
	"

	custom=$(
	echo "$custom" \| grep -v '^#' \| \
	sed -Ez 's:\n+:,:g;s:,$::;s:^,::'
	)

	echo "Running with: -custom-q $custom"

	TARGET_MODEL="Qwen3-Coder-480B-A35B-Instruct-HQ4_K"
	mkdir -p ~/Env/models/anikifoss/$TARGET_MODEL
	./build/bin/llama-quantize \
	--custom-q "$custom" \
	/mnt/data/Models/Qwen/Qwen3-Coder-480B-A35B-Instruct-GGUF/Qwen3-Coder-480B-A35B-Instruct-BF16-00001-of-00021.gguf \
	~/Env/models/anikifoss/$TARGET_MODEL/$TARGET_MODEL.gguf \
	Q4_K \
	32
	```