Spaces:
Sleeping
Sleeping
| import torch | |
| import torch.nn.functional as F | |
| from config import ModelArgs | |
| from model import DeepSeekV3 | |
| from tokenizer import Tokenizer | |
| def topk_sampling(model, prompt, device, max_length=50, top_k=50, temperature=1.0, tokenizer=None, hf_token=None): | |
| if tokenizer is None: | |
| # Use default tokenizer if none provided | |
| tokenizer_instance = Tokenizer(hf_token=hf_token) | |
| tokenizer = tokenizer_instance.ready_tokenizer() | |
| input_ids = tokenizer.encode(prompt, return_tensors='pt').to(device) | |
| generated_tokens = [] | |
| if(len(input_ids[0]) < max_length): | |
| max_length -= len(input_ids[0]) # If the input is longer than max_length, set max_length to the length of the input | |
| else: | |
| max_length = len(input_ids[0]) - max_length | |
| for _ in range(max_length): | |
| with torch.no_grad(), torch.autocast(device_type='cuda', dtype=torch.bfloat16): | |
| # Pass inference=True to use the inference path in the model | |
| outputs = model(input_ids, inference=True) | |
| logits = outputs[:, -1, :] | |
| logits = logits / temperature | |
| probs = F.softmax(logits, dim=-1) | |
| # Top-k filtering | |
| top_k_probs, top_k_indices = torch.topk(probs, top_k, dim=-1) | |
| # Sample from top-k | |
| next_token = torch.multinomial(top_k_probs, num_samples=1) | |
| xcol = torch.gather(top_k_indices, -1, next_token) | |
| input_ids = torch.cat([input_ids, xcol], dim=1) #1 because is it the dimension of the sequence | |
| if hasattr(tokenizer, 'eos_token_id') and tokenizer.eos_token_id and xcol.item() == tokenizer.eos_token_id: | |
| break | |
| return tokenizer.decode(input_ids[0]) | |
| def save_text(file_path, step, text): | |
| with open(file_path, 'w') as f: | |
| f.write(f"Step {step}: {text}\n") | |