LFM2-2.6B / app.py
akhaliq's picture
akhaliq HF Staff
Upload app.py with huggingface_hub
3f5e597 verified
import gradio as gr
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
import spaces
# Load model and tokenizer
model_id = "LiquidAI/LFM2-2.6B"
model = AutoModelForCausalLM.from_pretrained(
model_id,
device_map="auto", # Ensure proper device mapping for zero-gpu
)
tokenizer = AutoTokenizer.from_pretrained(model_id)
@spaces.GPU(duration=120)
def chat_with_model(message, history):
# Format conversation history
conversation = []
for user_msg, assistant_msg in history:
conversation.append({"role": "user", "content": user_msg})
conversation.append({"role": "assistant", "content": assistant_msg})
conversation.append({"role": "user", "content": message})
# Apply chat template
input_ids = tokenizer.apply_chat_template(
conversation,
add_generation_prompt=True,
return_tensors="pt",
tokenize=True,
).to(model.device)
# Generate response
output = model.generate(
input_ids,
do_sample=True,
temperature=0.3,
min_p=0.15,
repetition_penalty=1.05,
max_new_tokens=512,
pad_token_id=tokenizer.eos_token_id,
)
# Decode only the newly generated tokens, skipping the prompt
response = tokenizer.decode(output[0][input_ids.shape[-1]:], skip_special_tokens=True)
return response
# Create Gradio interface
iface = gr.ChatInterface(
fn=chat_with_model,
title="LFM2-2.6B Chatbot",
description="A chatbot powered by LiquidAI/LFM2-2.6B. Built with [anycoder](https://huggingface.co/spaces/akhaliq/anycoder).",
theme="soft",
examples=[
["What is C. elegans?"],
["Write a short story about a robot who discovers music."],
["Explain the importance of the transformer architecture in NLP."],
],
)
iface.launch()