import torch
import torch.nn.functional as F
from config import ModelArgs
from model import DeepSeekV3
from tokenizer import Tokenizer

def topk_sampling(model, prompt, device, max_length=50, top_k=50, temperature=1.0, tokenizer=None, hf_token=None):
    if tokenizer is None:
        # Use default tokenizer if none provided
        tokenizer_instance = Tokenizer(hf_token=hf_token)
        tokenizer = tokenizer_instance.ready_tokenizer()
        
    input_ids = tokenizer.encode(prompt, return_tensors='pt').to(device)
    generated_tokens = []
    
    if(len(input_ids[0]) < max_length):
        max_length -= len(input_ids[0]) # If the input is longer than max_length, set max_length to the length of the input
    else:
        max_length = len(input_ids[0]) - max_length  
    for _ in range(max_length):
        with torch.no_grad(), torch.autocast(device_type='cuda', dtype=torch.bfloat16):
            # Pass inference=True to use the inference path in the model
            outputs = model(input_ids, inference=True)
            logits = outputs[:, -1, :]
            logits = logits / temperature
            probs = F.softmax(logits, dim=-1)
            
            # Top-k filtering
            top_k_probs, top_k_indices = torch.topk(probs, top_k, dim=-1)
            
            # Sample from top-k
            next_token = torch.multinomial(top_k_probs, num_samples=1)
            
            xcol = torch.gather(top_k_indices, -1, next_token)
            input_ids = torch.cat([input_ids, xcol], dim=1) #1 because is it the dimension of the sequence
            
            if hasattr(tokenizer, 'eos_token_id') and tokenizer.eos_token_id and xcol.item() == tokenizer.eos_token_id:
                break
            
            
    return tokenizer.decode(input_ids[0])


def save_text(file_path, step, text):
    with open(file_path, 'w') as f:
        f.write(f"Step {step}: {text}\n")