metascroy commited on
Commit
12bbaa3
·
verified ·
1 Parent(s): eb50de4

Create README.md

Browse files
Files changed (1) hide show
  1. README.md +51 -0
README.md ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import io
3
+
4
+ model = torch.nn.Sequential(torch.nn.Linear(32, 256, dtype=torch.bfloat16, device="cuda"))
5
+
6
+ from torchao.quantization import IntxWeightOnlyConfig, quantize_
7
+ from torchao.quantization.granularity import PerGroup
8
+
9
+ version=1
10
+
11
+ quant_config = IntxWeightOnlyConfig(
12
+ weight_dtype=torch.int4,
13
+ granularity=PerGroup(32),
14
+ version=version
15
+ )
16
+ quantize_(model, quant_config)
17
+ example_inputs = (torch.randn(2, 32, dtype=torch.bfloat16, device="cuda"),)
18
+ output = model(*example_inputs)
19
+
20
+ # Push to hub
21
+ USER_ID = "torchao-testing"
22
+ MODEL_NAME = "single-linear"
23
+ save_to = f"{USER_ID}/{MODEL_NAME}-IntxWeightOnlyConfig-v{version}-0.14.dev"
24
+
25
+ from huggingface_hub import HfApi
26
+ api = HfApi()
27
+
28
+ buf = io.BytesIO()
29
+ torch.save(model.state_dict(), buf)
30
+ api.create_repo(save_to, repo_type="model", exist_ok=False)
31
+ api.upload_file(
32
+ path_or_fileobj=buf,
33
+ path_in_repo="model.pt",
34
+ repo_id=save_to,
35
+ )
36
+
37
+ buf = io.BytesIO()
38
+ torch.save(example_inputs, buf)
39
+ api.upload_file(
40
+ path_or_fileobj=buf,
41
+ path_in_repo="model_inputs.pt",
42
+ repo_id=save_to,
43
+ )
44
+
45
+ buf = io.BytesIO()
46
+ torch.save(output, buf)
47
+ api.upload_file(
48
+ path_or_fileobj=buf,
49
+ path_in_repo="model_output.pt",
50
+ repo_id=save_to,
51
+ )