metascroy commited on
Commit
911f1d0
·
verified ·
1 Parent(s): 8a1b2f7

Create README.md

Browse files
Files changed (1) hide show
  1. README.md +54 -0
README.md ADDED
@@ -0,0 +1,54 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ```
2
+ import torch
3
+ import io
4
+
5
+ model = torch.nn.Sequential(torch.nn.Linear(32, 256, dtype=torch.bfloat16, device="cuda"))
6
+
7
+ from torchao.quantization import IntxWeightOnlyConfig, quantize_
8
+ from torchao.quantization.granularity import PerGroup
9
+
10
+ version=2
11
+
12
+ quant_config = IntxWeightOnlyConfig(
13
+ weight_dtype=torch.int4,
14
+ granularity=PerGroup(32),
15
+ version=version
16
+ )
17
+ quantize_(model, quant_config)
18
+ example_inputs = (torch.randn(2, 32, dtype=torch.bfloat16, device="cuda"),)
19
+ output = model(*example_inputs)
20
+
21
+ # Push to hub
22
+ USER_ID = "torchao-testing"
23
+ MODEL_NAME = "single-linear"
24
+ save_to = f"{USER_ID}/{MODEL_NAME}-IntxWeightOnlyConfig-v{version}-0.14.dev"
25
+
26
+ from huggingface_hub import HfApi
27
+ api = HfApi()
28
+
29
+ buf = io.BytesIO()
30
+ torch.save(model.state_dict(), buf)
31
+ api.create_repo(save_to, repo_type="model", exist_ok=False)
32
+ api.upload_file(
33
+ path_or_fileobj=buf,
34
+ path_in_repo="model.pt",
35
+ repo_id=save_to,
36
+ )
37
+
38
+ buf = io.BytesIO()
39
+ torch.save(example_inputs, buf)
40
+ api.upload_file(
41
+ path_or_fileobj=buf,
42
+ path_in_repo="model_inputs.pt",
43
+ repo_id=save_to,
44
+ )
45
+
46
+ buf = io.BytesIO()
47
+ torch.save(output, buf)
48
+ api.upload_file(
49
+ path_or_fileobj=buf,
50
+ path_in_repo="model_output.pt",
51
+ repo_id=save_to,
52
+ )
53
+
54
+ ```