Spaces:
Running
on
Zero
Running
on
Zero
| import gradio as gr | |
| import torch | |
| from transformers import AutoModelForCausalLM, AutoTokenizer | |
| import spaces | |
| # Load model and tokenizer | |
| model_id = "LiquidAI/LFM2-2.6B" | |
| model = AutoModelForCausalLM.from_pretrained( | |
| model_id, | |
| device_map="auto", # Ensure proper device mapping for zero-gpu | |
| ) | |
| tokenizer = AutoTokenizer.from_pretrained(model_id) | |
| def chat_with_model(message, history): | |
| # Format conversation history | |
| conversation = [] | |
| for user_msg, assistant_msg in history: | |
| conversation.append({"role": "user", "content": user_msg}) | |
| conversation.append({"role": "assistant", "content": assistant_msg}) | |
| conversation.append({"role": "user", "content": message}) | |
| # Apply chat template | |
| input_ids = tokenizer.apply_chat_template( | |
| conversation, | |
| add_generation_prompt=True, | |
| return_tensors="pt", | |
| tokenize=True, | |
| ).to(model.device) | |
| # Generate response | |
| output = model.generate( | |
| input_ids, | |
| do_sample=True, | |
| temperature=0.3, | |
| min_p=0.15, | |
| repetition_penalty=1.05, | |
| max_new_tokens=512, | |
| pad_token_id=tokenizer.eos_token_id, | |
| ) | |
| # Decode only the newly generated tokens, skipping the prompt | |
| response = tokenizer.decode(output[0][input_ids.shape[-1]:], skip_special_tokens=True) | |
| return response | |
| # Create Gradio interface | |
| iface = gr.ChatInterface( | |
| fn=chat_with_model, | |
| title="LFM2-2.6B Chatbot", | |
| description="A chatbot powered by LiquidAI/LFM2-2.6B. Built with [anycoder](https://huggingface.co/spaces/akhaliq/anycoder).", | |
| theme="soft", | |
| examples=[ | |
| ["What is C. elegans?"], | |
| ["Write a short story about a robot who discovers music."], | |
| ["Explain the importance of the transformer architecture in NLP."], | |
| ], | |
| ) | |
| iface.launch() |