import gradio as gr from transformers import AutoTokenizer, AutoModelForCausalLM, TextIteratorStreamer import torch from threading import Thread MODEL_NAMES = { "LFM2-350M": "LiquidAI/LFM2-350M", "LFM2-700M": "LiquidAI/LFM2-700M", "LFM2-1.2B": "LiquidAI/LFM2-1.2B", "LFM2-2.6B": "LiquidAI/LFM2-2.6B", "LFM2-8B-A1B": "LiquidAI/LFM2-8B-A1B", } model_cache = {} def load_model(model_key): if model_key in model_cache: return model_cache[model_key] model_name = MODEL_NAMES[model_key] print(f"Loading {model_name}...") tokenizer = AutoTokenizer.from_pretrained(model_name) device = "cuda" if torch.cuda.is_available() else "cpu" model = AutoModelForCausalLM.from_pretrained( model_name, dtype=torch.float16 if device == "cuda" else torch.float32, ).to(device) model_cache[model_key] = (tokenizer, model) return tokenizer, model def chat_with_model(message, history, model_choice): tokenizer, model = load_model(model_choice) device = model.device prompt = "You are LFM2, an intelligent and conversational AI assistant designed to help users with questions, problem-solving, and creative tasks. You communicate clearly, reason carefully, and explain your thoughts in an easy-to-understand way. Stay friendly, professional, and curious. If the user's request is ambiguous, ask clarifying questions before proceeding." for msg in history: role = msg["role"] content = msg["content"] prompt += f"{role.capitalize()}: {content}\n" prompt += f"User: {message}\nAssistant:" streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True) inputs = tokenizer(prompt, return_tensors="pt").to(device) generation_kwargs = dict( **inputs, streamer=streamer, max_new_tokens=2048, temperature=0.7, top_p=0.9, do_sample=True, ) thread = Thread(target=model.generate, kwargs=generation_kwargs) thread.start() partial_text = "" for new_text in streamer: partial_text += new_text yield history + [ {"role": "user", "content": message}, {"role": "assistant", "content": partial_text}, ] def create_demo(): with gr.Blocks(title="LiquidAI Chat Playground") as demo: gr.Markdown("## 💧 LiquidAI Chat Playground") model_choice = gr.Dropdown( label="Select Model", choices=list(MODEL_NAMES.keys()), value="LFM2-1.2B" ) chatbot = gr.Chatbot( label="Chat with LiquidAI", type="messages", height=450 ) msg = gr.Textbox(label="Your message", placeholder="Type something...") clear = gr.Button("Clear") def add_user_message(user_message, chat_history): chat_history = chat_history + [{"role": "user", "content": user_message}] return "", chat_history msg.submit(add_user_message, [msg, chatbot], [msg, chatbot], queue=False).then( chat_with_model, [msg, chatbot, model_choice], chatbot ) clear.click(lambda: [], None, chatbot, queue=False) return demo if __name__ == "__main__": demo = create_demo() demo.queue() demo.launch(server_name="0.0.0.0", server_port=7860)