Update README.md
Browse files
README.md
CHANGED
|
@@ -135,7 +135,7 @@ model_to_quantize = "google/gemma-3-12b-it"
|
|
| 135 |
|
| 136 |
|
| 137 |
from torchao.quantization import Int4WeightOnlyConfig
|
| 138 |
-
quant_config = Int4WeightOnlyConfig(group_size=128
|
| 139 |
quantization_config = TorchAoConfig(quant_type=quant_config)
|
| 140 |
quantized_model = AutoModelForCausalLM.from_pretrained(model_to_quantize, device_map="cuda:0", torch_dtype=torch.bfloat16, quantization_config=quantization_config)
|
| 141 |
tokenizer = AutoTokenizer.from_pretrained(model_id)
|
|
|
|
| 135 |
|
| 136 |
|
| 137 |
from torchao.quantization import Int4WeightOnlyConfig
|
| 138 |
+
quant_config = Int4WeightOnlyConfig(group_size=128)
|
| 139 |
quantization_config = TorchAoConfig(quant_type=quant_config)
|
| 140 |
quantized_model = AutoModelForCausalLM.from_pretrained(model_to_quantize, device_map="cuda:0", torch_dtype=torch.bfloat16, quantization_config=quantization_config)
|
| 141 |
tokenizer = AutoTokenizer.from_pretrained(model_id)
|