| from awq import AutoAWQForCausalLM | |
| from transformers import AutoTokenizer | |
| MODEL_PATH = "DeepSeek-V3-1B-Test" | |
| QUANT_PATH = "DeepSeek-V3-1B-Test-AWQ" | |
| QUANT_CONFIG = {"zero_point": True, "q_group_size": 128, "w_bit": 4, "version": "GEMM", "modules_to_not_convert": ["self_attn.kv_a_proj_with_mqa"]} | |
| def main(): | |
| model = AutoAWQForCausalLM.from_pretrained(MODEL_PATH, low_cpu_mem_usage=True, use_cache=False) | |
| tokenizer = AutoTokenizer.from_pretrained(MODEL_PATH, legacy=True) | |
| model.quantize( | |
| tokenizer, | |
| quant_config=QUANT_CONFIG, | |
| ) | |
| model.save_quantized(QUANT_PATH) | |
| tokenizer.save_pretrained(QUANT_PATH) | |
| print(f"Model is quantized and saved at \"{QUANT_PATH}\".") | |
| if __name__ == "__main__": | |
| main() | |