Spaces:

abancp
/

10M-LLM

Sleeping

abancp commited on May 3

Commit

dd1b76c

verified ·

1 Parent(s): aa1287d

Update inference_fine_tune.py

Files changed (1) hide show

inference_fine_tune.py CHANGED Viewed

@@ -47,6 +47,7 @@ def generate_response(prompt:str):
     temperature = 0.7
     top_k = 50
     i = 0
     while decoder_input.shape[1] < 2000:
         # Apply causal mask based on current decoder_input length
         # decoder_mask = (decoder_input != pad_token_id).unsqueeze(0).int() & causal_mask(decoder_input.size(1)).type_as(input_mask).to(device)
@@ -59,11 +60,12 @@ def generate_response(prompt:str):
         next_token = torch.multinomial(probs, num_samples=1)
         next_token = top_k_indices.gather(-1, next_token)
         word += tokenizer.decode([next_token.item()])
         i+=1
         decoder_input = torch.cat([decoder_input, next_token], dim=1)
         if decoder_input.shape[1] > config['seq_len']:
             decoder_input = decoder_input[:,-config['seq_len']:]
         if next_token.item() == eos_token_id  or i >= 1024:
             break
-    print("Output : ",word)
     return word

     temperature = 0.7
     top_k = 50
     i = 0
+    print("Output  : ",end="")
     while decoder_input.shape[1] < 2000:
         # Apply causal mask based on current decoder_input length
         # decoder_mask = (decoder_input != pad_token_id).unsqueeze(0).int() & causal_mask(decoder_input.size(1)).type_as(input_mask).to(device)
         next_token = torch.multinomial(probs, num_samples=1)
         next_token = top_k_indices.gather(-1, next_token)
         word += tokenizer.decode([next_token.item()])
+        print(word,end="")
         i+=1
         decoder_input = torch.cat([decoder_input, next_token], dim=1)
         if decoder_input.shape[1] > config['seq_len']:
             decoder_input = decoder_input[:,-config['seq_len']:]
         if next_token.item() == eos_token_id  or i >= 1024:
             break
+    print()
     return word