Soprano-RVC / soprano /backends /transformers.py
ekwek's picture
Upload 10 files
63d4ab6 verified
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from .base import BaseModel
class TransformersModel(BaseModel):
def __init__(self,
device='cuda',
**kwargs):
self.device = device
self.model = AutoModelForCausalLM.from_pretrained(
'ekwek/Soprano-80M',
torch_dtype=torch.bfloat16 if device == 'cuda' else torch.float32,
device_map=device
)
self.tokenizer = AutoTokenizer.from_pretrained('ekwek/Soprano-80M')
self.model.eval()
def infer(self,
prompts,
top_p=0.95,
temperature=0.3,
repetition_penalty=1.2):
inputs = self.tokenizer(
prompts,
return_tensors='pt',
padding=True,
truncation=True,
max_length=512,
).to(self.device)
with torch.no_grad():
outputs = self.model.generate(
input_ids=inputs['input_ids'],
attention_mask=inputs['attention_mask'],
max_new_tokens=512,
do_sample=True,
top_p=top_p,
temperature=temperature,
repetition_penalty=repetition_penalty,
pad_token_id=self.tokenizer.pad_token_id,
return_dict_in_generate=True,
output_hidden_states=True,
)
res = []
eos_token_id = self.model.config.eos_token_id
for i in range(len(prompts)):
seq = outputs.sequences[i]
hidden_states = []
num_output_tokens = len(outputs.hidden_states)
for j in range(num_output_tokens):
token = seq[j + seq.size(0) - num_output_tokens]
if token != eos_token_id: hidden_states.append(outputs.hidden_states[j][-1][i, -1, :])
last_hidden_state = torch.stack(hidden_states).squeeze()
finish_reason = 'stop' if seq[-1].item() == eos_token_id else 'length'
res.append({
'finish_reason': finish_reason,
'hidden_state': last_hidden_state
})
return res
def stream_infer(self,
prompt,
top_p=0.95,
temperature=0.3,
repetition_penalty=1.2):
raise NotImplementedError("transformers backend does not currently support streaming, please consider using lmdeploy backend instead.")