Spaces:

akhaliq
/

LFM2-2.6B

Running on Zero

App Files Files Community

LFM2-2.6B / app.py

akhaliq HF Staff

Upload app.py with huggingface_hub

3f5e597 verified 3 months ago

raw

history blame contribute delete

1.85 kB

	import gradio as gr
	import torch
	from transformers import AutoModelForCausalLM, AutoTokenizer
	import spaces

	# Load model and tokenizer
	model_id = "LiquidAI/LFM2-2.6B"
	model = AutoModelForCausalLM.from_pretrained(
	model_id,
	device_map="auto", # Ensure proper device mapping for zero-gpu
	)
	tokenizer = AutoTokenizer.from_pretrained(model_id)

	@spaces.GPU(duration=120)
	def chat_with_model(message, history):
	# Format conversation history
	conversation = []
	for user_msg, assistant_msg in history:
	conversation.append({"role": "user", "content": user_msg})
	conversation.append({"role": "assistant", "content": assistant_msg})
	conversation.append({"role": "user", "content": message})

	# Apply chat template
	input_ids = tokenizer.apply_chat_template(
	conversation,
	add_generation_prompt=True,
	return_tensors="pt",
	tokenize=True,
	).to(model.device)

	# Generate response
	output = model.generate(
	input_ids,
	do_sample=True,
	temperature=0.3,
	min_p=0.15,
	repetition_penalty=1.05,
	max_new_tokens=512,
	pad_token_id=tokenizer.eos_token_id,
	)

	# Decode only the newly generated tokens, skipping the prompt
	response = tokenizer.decode(output[0][input_ids.shape[-1]:], skip_special_tokens=True)

	return response

	# Create Gradio interface
	iface = gr.ChatInterface(
	fn=chat_with_model,
	title="LFM2-2.6B Chatbot",
	description="A chatbot powered by LiquidAI/LFM2-2.6B. Built with [anycoder](https://huggingface.co/spaces/akhaliq/anycoder).",
	theme="soft",
	examples=[
	["What is C. elegans?"],
	["Write a short story about a robot who discovers music."],
	["Explain the importance of the transformer architecture in NLP."],
	],
	)

	iface.launch()