| import gradio as gr | |
| from transformers import AutoTokenizer, AutoModelForCausalLM, TextIteratorStreamer | |
| import torch | |
| from threading import Thread | |
| MODEL_NAMES = { | |
| "LFM2-350M": "LiquidAI/LFM2-350M", | |
| "LFM2-700M": "LiquidAI/LFM2-700M", | |
| "LFM2-1.2B": "LiquidAI/LFM2-1.2B", | |
| "LFM2-2.6B": "LiquidAI/LFM2-2.6B", | |
| "LFM2-8B-A1B": "LiquidAI/LFM2-8B-A1B", | |
| } | |
| model_cache = {} | |
| def load_model(model_key): | |
| if model_key in model_cache: | |
| return model_cache[model_key] | |
| model_name = MODEL_NAMES[model_key] | |
| print(f"Loading {model_name}...") | |
| tokenizer = AutoTokenizer.from_pretrained(model_name) | |
| device = "cuda" if torch.cuda.is_available() else "cpu" | |
| model = AutoModelForCausalLM.from_pretrained( | |
| model_name, | |
| dtype=torch.float16 if device == "cuda" else torch.float32, | |
| ).to(device) | |
| model_cache[model_key] = (tokenizer, model) | |
| return tokenizer, model | |
| def chat_with_model(message, history, model_choice): | |
| tokenizer, model = load_model(model_choice) | |
| device = model.device | |
| prompt = "You are LFM2, an intelligent and conversational AI assistant designed to help users with questions, problem-solving, and creative tasks. You communicate clearly, reason carefully, and explain your thoughts in an easy-to-understand way. Stay friendly, professional, and curious. If the user's request is ambiguous, ask clarifying questions before proceeding." | |
| for msg in history: | |
| role = msg["role"] | |
| content = msg["content"] | |
| prompt += f"{role.capitalize()}: {content}\n" | |
| prompt += f"User: {message}\nAssistant:" | |
| streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True) | |
| inputs = tokenizer(prompt, return_tensors="pt").to(device) | |
| generation_kwargs = dict( | |
| **inputs, | |
| streamer=streamer, | |
| max_new_tokens=2048, | |
| temperature=0.7, | |
| top_p=0.9, | |
| do_sample=True, | |
| ) | |
| thread = Thread(target=model.generate, kwargs=generation_kwargs) | |
| thread.start() | |
| partial_text = "" | |
| for new_text in streamer: | |
| partial_text += new_text | |
| yield history + [ | |
| {"role": "user", "content": message}, | |
| {"role": "assistant", "content": partial_text}, | |
| ] | |
| def create_demo(): | |
| with gr.Blocks(title="LiquidAI Chat Playground") as demo: | |
| gr.Markdown("## 💧 LiquidAI Chat Playground") | |
| model_choice = gr.Dropdown( | |
| label="Select Model", | |
| choices=list(MODEL_NAMES.keys()), | |
| value="LFM2-1.2B" | |
| ) | |
| chatbot = gr.Chatbot( | |
| label="Chat with LiquidAI", | |
| type="messages", | |
| height=450 | |
| ) | |
| msg = gr.Textbox(label="Your message", placeholder="Type something...") | |
| clear = gr.Button("Clear") | |
| def add_user_message(user_message, chat_history): | |
| chat_history = chat_history + [{"role": "user", "content": user_message}] | |
| return "", chat_history | |
| msg.submit(add_user_message, [msg, chatbot], [msg, chatbot], queue=False).then( | |
| chat_with_model, [msg, chatbot, model_choice], chatbot | |
| ) | |
| clear.click(lambda: [], None, chatbot, queue=False) | |
| return demo | |
| if __name__ == "__main__": | |
| demo = create_demo() | |
| demo.queue() | |
| demo.launch(server_name="0.0.0.0", server_port=7860) |