torchao-testing
/

opt-125m-ModuleFqnToConfig-v1-regex-0.14.0.dev

Model card Files Files and versions

jerryzh168 commited on Oct 1

Commit

e6d0cfd

·

verified ·

1 Parent(s): d91bfb8

Update README.md

Files changed (1) hide show

README.md +8 -1

README.md CHANGED Viewed

@@ -151,7 +151,6 @@ quantized_model = AutoModelForCausalLM.from_pretrained(
     device_map=device,
     dtype=torch.bfloat16,
 )
-print("quantized model:", quantized_model)
 for i in range(12):
     if i == 3:
         assert isinstance(quantized_model.model.decoder.layers[i].self_attn.q_proj.weight, Int4TilePackedTo4dTensor)
@@ -170,4 +169,12 @@ input_ids = tokenizer(input_text, return_tensors="pt").to(device)
 output = quantized_model.generate(**input_ids, max_new_tokens=max_new_tokens)
 print(tokenizer.decode(output[0], skip_special_tokens=True))
 ```

     device_map=device,
     dtype=torch.bfloat16,
 )
 for i in range(12):
     if i == 3:
         assert isinstance(quantized_model.model.decoder.layers[i].self_attn.q_proj.weight, Int4TilePackedTo4dTensor)
 output = quantized_model.generate(**input_ids, max_new_tokens=max_new_tokens)
 print(tokenizer.decode(output[0], skip_special_tokens=True))
+```
+Output:
+```
+What are we having for dinner?
+A nice dinner with a friend.
+I
 ```