| ``` | |
| model: single_linear | |
| config: Int4WeightOnlyConfig | |
| config version: 1 | |
| torchao version: 0.14.dev | |
| ``` | |
| ``` | |
| import torch | |
| import io | |
| model = torch.nn.Sequential(torch.nn.Linear(32, 256, dtype=torch.bfloat16, device="cuda")) | |
| from torchao.quantization import IntxWeightOnlyConfig, quantize_ | |
| from torchao.quantization.granularity import PerGroup | |
| version=1 | |
| quant_config = IntxWeightOnlyConfig( | |
| weight_dtype=torch.int4, | |
| granularity=PerGroup(32), | |
| version=version | |
| ) | |
| quantize_(model, quant_config) | |
| example_inputs = (torch.randn(2, 32, dtype=torch.bfloat16, device="cuda"),) | |
| output = model(*example_inputs) | |
| # Push to hub | |
| USER_ID = "torchao-testing" | |
| MODEL_NAME = "single-linear" | |
| save_to = f"{USER_ID}/{MODEL_NAME}-IntxWeightOnlyConfig-v{version}-0.14.dev" | |
| from huggingface_hub import HfApi | |
| api = HfApi() | |
| buf = io.BytesIO() | |
| torch.save(model.state_dict(), buf) | |
| api.create_repo(save_to, repo_type="model", exist_ok=False) | |
| api.upload_file( | |
| path_or_fileobj=buf, | |
| path_in_repo="model.pt", | |
| repo_id=save_to, | |
| ) | |
| buf = io.BytesIO() | |
| torch.save(example_inputs, buf) | |
| api.upload_file( | |
| path_or_fileobj=buf, | |
| path_in_repo="model_inputs.pt", | |
| repo_id=save_to, | |
| ) | |
| buf = io.BytesIO() | |
| torch.save(output, buf) | |
| api.upload_file( | |
| path_or_fileobj=buf, | |
| path_in_repo="model_output.pt", | |
| repo_id=save_to, | |
| ) | |
| ``` | |