Update inference_fine_tune.py
Browse files- inference_fine_tune.py +23 -27
inference_fine_tune.py
CHANGED
|
@@ -31,41 +31,37 @@ model.eval()
|
|
| 31 |
state = torch.load(model_path,map_location=torch.device('cpu'))
|
| 32 |
model.load_state_dict(state['model_state_dict'])
|
| 33 |
|
| 34 |
-
def generate_response(prompt:str):
|
| 35 |
-
print("Prompt
|
| 36 |
-
|
| 37 |
-
word = ""
|
| 38 |
input_tokens = tokenizer.encode(prompt).ids
|
| 39 |
-
input_tokens
|
|
|
|
| 40 |
if len(input_tokens) > config['seq_len']:
|
| 41 |
-
print(f"
|
| 42 |
-
|
| 43 |
-
|
| 44 |
-
|
| 45 |
-
|
| 46 |
-
decoder_input = decoder_input.unsqueeze(0)
|
| 47 |
temperature = 0.7
|
| 48 |
top_k = 50
|
| 49 |
i = 0
|
| 50 |
-
|
| 51 |
-
while
|
| 52 |
-
|
| 53 |
-
|
| 54 |
-
# Get model output
|
| 55 |
-
out = model.decode(decoder_input)
|
| 56 |
-
logits = model.project(out[:, -1]) # Get logits for last token
|
| 57 |
logits = logits / temperature
|
| 58 |
top_k_logits, top_k_indices = torch.topk(logits, top_k)
|
| 59 |
probs = torch.softmax(top_k_logits, dim=-1)
|
| 60 |
next_token = torch.multinomial(probs, num_samples=1)
|
| 61 |
next_token = top_k_indices.gather(-1, next_token)
|
| 62 |
-
|
| 63 |
-
|
| 64 |
-
|
| 65 |
-
|
| 66 |
-
|
| 67 |
-
|
| 68 |
-
|
|
|
|
|
|
|
| 69 |
break
|
| 70 |
-
|
| 71 |
-
return word
|
|
|
|
| 31 |
state = torch.load(model_path,map_location=torch.device('cpu'))
|
| 32 |
model.load_state_dict(state['model_state_dict'])
|
| 33 |
|
| 34 |
+
def generate_response(prompt: str):
|
| 35 |
+
print("Prompt:", prompt)
|
|
|
|
|
|
|
| 36 |
input_tokens = tokenizer.encode(prompt).ids
|
| 37 |
+
input_tokens = [user_token_id] + input_tokens + [ai_token_id]
|
| 38 |
+
|
| 39 |
if len(input_tokens) > config['seq_len']:
|
| 40 |
+
print(f"Exceeding max length of input: {config['seq_len']}")
|
| 41 |
+
return
|
| 42 |
+
|
| 43 |
+
input_tokens = torch.tensor(input_tokens).unsqueeze(0).to(device) # (1, seq_len)
|
| 44 |
+
|
|
|
|
| 45 |
temperature = 0.7
|
| 46 |
top_k = 50
|
| 47 |
i = 0
|
| 48 |
+
|
| 49 |
+
while input_tokens.shape[1] < 2000:
|
| 50 |
+
out = model.decode(input_tokens)
|
| 51 |
+
logits = model.project(out[:, -1])
|
|
|
|
|
|
|
|
|
|
| 52 |
logits = logits / temperature
|
| 53 |
top_k_logits, top_k_indices = torch.topk(logits, top_k)
|
| 54 |
probs = torch.softmax(top_k_logits, dim=-1)
|
| 55 |
next_token = torch.multinomial(probs, num_samples=1)
|
| 56 |
next_token = top_k_indices.gather(-1, next_token)
|
| 57 |
+
|
| 58 |
+
decoded_word = tokenizer.decode([next_token.item()])
|
| 59 |
+
yield decoded_word # Streaming output token-by-token
|
| 60 |
+
|
| 61 |
+
input_tokens = torch.cat([input_tokens, next_token], dim=1)
|
| 62 |
+
if input_tokens.shape[1] > config['seq_len']:
|
| 63 |
+
input_tokens = input_tokens[:, -config['seq_len']:]
|
| 64 |
+
|
| 65 |
+
if next_token.item() == eos_token_id or i >= 1024:
|
| 66 |
break
|
| 67 |
+
i += 1
|
|
|