Spaces:

chbsaikiran
/

SMOLLM2_105M

Sleeping

App Files Files Community

SMOLLM2_105M / app.py

chbsaikiran

Initial Commit

c6e5e78 10 months ago

raw

history blame

2.4 kB

	import torch
	from torch import nn
	from tokenizers import Tokenizer
	import gradio as gr
	from model import TransformerModel, config

	# Load your custom tokenizer from tokenizer.json
	tokenizer = Tokenizer.from_file("vocab.json")

	# Load the model from a local .pth file
	device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
	checkpoint_path = "model_bin.pth"

	# Load tokenizer and model
	tokenizer = AutoTokenizer.from_pretrained("HuggingFaceTB/cosmo2-tokenizer")
	if tokenizer.pad_token is None:
	tokenizer.pad_token = tokenizer.eos_token if tokenizer.eos_token else "[PAD]"

	# Initialize model with reduced parameters (135M config)
	model = LlamaForCausalLM(
	vocab_size=tokenizer.vocab_size,
	dim=576,
	num_layers=30,
	hidden_dim=1536,
	num_heads=9
	)
	checkpoint = torch.load(checkpoint_path, map_location=device)
	model.load_state_dict(checkpoint['model_state_dict'])
	model.to(device)
	model.eval()

	def generate_text(prompt, max_length=100, temperature=0.7, top_k=50):
	input_ids = tokenizer.encode(prompt, return_tensors="pt").to(device)

	with torch.no_grad():
	for _ in range(max_length):
	outputs = model(input_ids)
	next_token_logits = outputs[:, -1, :] / temperature

	# Apply top-k sampling
	top_k_logits, top_k_indices = torch.topk(next_token_logits, top_k, dim=-1)
	probs = torch.softmax(top_k_logits, dim=-1)

	# Sample from distribution
	next_token_idx = torch.multinomial(probs, num_samples=1)
	next_token = top_k_indices[0, next_token_idx[0]]

	if next_token.item() == tokenizer.eos_token_id:
	break

	input_ids = torch.cat([input_ids, next_token.unsqueeze(0)], dim=1)

	return tokenizer.decode(input_ids[0], skip_special_tokens=True)

	# Gradio interface
	demo = gr.Interface(
	fn=generate_text,
	inputs=[
	gr.Textbox(label="Input Prompt", lines=3),
	gr.Slider(50, 200, value=100, label="Max Length"),
	gr.Slider(0.1, 2.0, value=0.7, label="Temperature"),
	gr.Slider(10, 100, value=50, label="Top-k")
	],
	outputs=gr.Textbox(label="Generated Text", lines=5),
	title="🦙 Custom SmolLLM Demo",
	description="A 135M parameter language model trained on smollm-corpus"
	)

	if __name__ == "__main__":
	demo.launch()