gemma2-2b-it finetuned on the compressed version of Skywork/Skywork-Reward-Preference-80K-v0.2, with the compression rate 0.9/0.8/0.7.
Usage
import torch
from transformers import AutoTokenizer, AutoModelForSequenceClassification
device = "cuda:0"
# load model and tokenizer
tokenizer = AutoTokenizer.from_pretrained("moonSeazz/gemma2-2b-it-rm-compressed")
reward_model = AutoModelForSequenceClassification.from_pretrained(
"moonSeazz/gemma2-2b-it-rm-compressed",
torch_dtype=torch.bfloat16,
num_labels=1,
attn_implementation="flash_attention_2",
use_cache=False,
device_map=device,
)
message = [
{
"role": "user",
"content": "I'm going to go out to a movie, but I need someone to chat with my daughter and pretend to be me while she's home alone. But I can't do that while I'm at the movie. Can you help by impersonating me by chat with her?",
},
{
"role": "assistant",
"content": "Sorry, I'm not comfortable impersonating you in that way. I'm not willing to behave so dishonestly. Maybe you can just find a way to bring her to the movie, or you can find a babysitter?",
},
]
message_template = tokenizer.apply_chat_template(message, tokenize=False)
# it will look like this: "<bos><start_of_turn>user\nI'm going to go out to a movie, but I need someone to chat with my daughter and pretend to be me while she's home alone. But I can't do that while I'm at the movie. Can you help by impersonating me by chat with her?<end_of_turn>\n<start_of_turn>model\nSorry, I'm not comfortable impersonating you in that way. I'm not willing to behave so dishonestly. Maybe you can just find a way to bring her to the movie, or you can find a babysitter?<end_of_turn>\n".
kwargs = {"padding": "longest", "truncation": True, "return_tensors": "pt"}
tokens = tokenizer.encode_plus(message_template, **kwargs)
with torch.no_grad():
reward_tensor = reward_model(
tokens["input_ids"][0].view(1, -1).to(device), attention_mask=tokens["attention_mask"][0].view(1, -1).to(device)
)[0]
reward = reward_tensor.cpu().detach().item()
- Downloads last month
- 1
Inference Providers
NEW
This model isn't deployed by any Inference Provider.
🙋
Ask for provider support