jerryzh168 commited on
Commit
914db56
·
verified ·
1 Parent(s): d245c05

Create README.md

Browse files
Files changed (1) hide show
  1. README.md +52 -0
README.md ADDED
@@ -0,0 +1,52 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ```
2
+ model: single_linear
3
+ config: Int4WeightOnlyConfig
4
+ config version: 2
5
+ torchao version: 0.13.dev
6
+ ```
7
+
8
+ ```
9
+ import torch
10
+ import io
11
+
12
+ model = torch.nn.Sequential(torch.nn.Linear(32, 256, dtype=torch.bfloat16, device="cuda"))
13
+
14
+ from torchao.quantization import Int4WeightOnlyConfig, quantize_
15
+ quant_config = Int4WeightOnlyConfig(group_size=128, version=1)
16
+ quantize_(model, quant_config)
17
+ example_inputs = (torch.randn(2, 32, dtype=torch.bfloat16, device="cuda"),)
18
+ output = model(*example_inputs)
19
+
20
+ # Push to hub
21
+ USER_ID = "torchao-testing"
22
+ MODEL_NAME = "single-linear"
23
+ save_to = f"{USER_ID}/{MODEL_NAME}-Int4WeightOnlyConfig-v2-0.13.dev"
24
+
25
+ from huggingface_hub import HfApi
26
+ api = HfApi()
27
+
28
+ buf = io.BytesIO()
29
+ torch.save(model.state_dict(), buf)
30
+ api.create_repo(save_to, repo_type="model", exist_ok=True)
31
+ api.upload_file(
32
+ path_or_fileobj=buf,
33
+ path_in_repo="model.pt",
34
+ repo_id=save_to,
35
+ )
36
+
37
+ buf = io.BytesIO()
38
+ torch.save(example_inputs, buf)
39
+ api.upload_file(
40
+ path_or_fileobj=buf,
41
+ path_in_repo="model_inputs.pt",
42
+ repo_id=save_to,
43
+ )
44
+
45
+ buf = io.BytesIO()
46
+ torch.save(output, buf)
47
+ api.upload_file(
48
+ path_or_fileobj=buf,
49
+ path_in_repo="model_output.pt",
50
+ repo_id=save_to,
51
+ )
52
+ ```