| ``` | |
| model: single_linear | |
| config: Int4WeightOnlyConfig | |
| config version: 2 | |
| torchao version: 0.13.dev | |
| ``` | |
| ``` | |
| import torch | |
| import io | |
| model = torch.nn.Sequential(torch.nn.Linear(32, 256, dtype=torch.bfloat16, device="cuda")) | |
| from torchao.quantization import Int4WeightOnlyConfig, quantize_ | |
| quant_config = Int4WeightOnlyConfig(group_size=128, int4_packing_format="plain", version=2) | |
| quantize_(model, quant_config) | |
| example_inputs = (torch.randn(2, 32, dtype=torch.bfloat16, device="cuda"),) | |
| output = model(*example_inputs) | |
| # Push to hub | |
| USER_ID = "torchao-testing" | |
| MODEL_NAME = "single-linear" | |
| save_to = f"{USER_ID}/{MODEL_NAME}-Int4WeightOnlyConfig-v2-0.13.dev" | |
| from huggingface_hub import HfApi | |
| api = HfApi() | |
| buf = io.BytesIO() | |
| torch.save(model.state_dict(), buf) | |
| api.create_repo(save_to, repo_type="model", exist_ok=True) | |
| api.upload_file( | |
| path_or_fileobj=buf, | |
| path_in_repo="model.pt", | |
| repo_id=save_to, | |
| ) | |
| buf = io.BytesIO() | |
| torch.save(example_inputs, buf) | |
| api.upload_file( | |
| path_or_fileobj=buf, | |
| path_in_repo="model_inputs.pt", | |
| repo_id=save_to, | |
| ) | |
| buf = io.BytesIO() | |
| torch.save(output, buf) | |
| api.upload_file( | |
| path_or_fileobj=buf, | |
| path_in_repo="model_output.pt", | |
| repo_id=save_to, | |
| ) | |
| ``` |