Spaces:
Runtime error
Runtime error
| import gradio as gr | |
| from transformers import AutoModelForCausalLM, AutoTokenizer | |
| class MultiModelChat: | |
| def __init__(self): | |
| self.models = {} | |
| def ensure_model_loaded(self, model_name): | |
| """Lazy load a model only when needed""" | |
| if model_name not in self.models: | |
| print(f"Loading {model_name} model...") | |
| if model_name == 'SmolLM2': | |
| self.models['SmolLM2'] = { | |
| 'tokenizer': AutoTokenizer.from_pretrained("HuggingFaceTB/SmolLM2-135M-Instruct"), | |
| 'model': AutoModelForCausalLM.from_pretrained("HuggingFaceTB/SmolLM2-135M-Instruct") | |
| } | |
| elif model_name == 'NanoLM-25M': | |
| self.models['NanoLM-25M'] = { | |
| 'tokenizer': AutoTokenizer.from_pretrained("Mxode/NanoLM-25M-Instruct-v1.1"), | |
| 'model': AutoModelForCausalLM.from_pretrained("Mxode/NanoLM-25M-Instruct-v1.1") | |
| } | |
| elif model_name == 'NanoTranslator-S': | |
| self.models['NanoTranslator-S'] = { | |
| 'tokenizer': AutoTokenizer.from_pretrained("Mxode/NanoTranslator-S"), | |
| 'model': AutoModelForCausalLM.from_pretrained("Mxode/NanoTranslator-S") | |
| } | |
| elif model_name == 'NanoTranslator-XL': | |
| self.models['NanoTranslator-XL'] = { | |
| 'tokenizer': AutoTokenizer.from_pretrained("Mxode/NanoTranslator-XL"), | |
| 'model': AutoModelForCausalLM.from_pretrained("Mxode/NanoTranslator-XL") | |
| } | |
| # Set pad token for the newly loaded model | |
| if self.models[model_name]['tokenizer'].pad_token is None: | |
| self.models[model_name]['tokenizer'].pad_token = self.models[model_name]['tokenizer'].eos_token | |
| print(f"{model_name} model loaded successfully!") | |
| def chat(self, message, history, model_choice): | |
| if model_choice == "SmolLM2": | |
| return self.chat_smol(message, history) | |
| elif model_choice == "NanoLM-25M": | |
| return self.chat_nanolm(message, history) | |
| elif model_choice == "NanoTranslator-S": | |
| return self.chat_translator(message, history) | |
| elif model_choice == "NanoTranslator-XL": | |
| return self.chat_translator_xl(message, history) | |
| def chat_smol(self, message, history): | |
| self.ensure_model_loaded('SmolLM2') | |
| tokenizer = self.models['SmolLM2']['tokenizer'] | |
| model = self.models['SmolLM2']['model'] | |
| inputs = tokenizer(f"User: {message}\nAssistant:", return_tensors="pt") | |
| outputs = model.generate( | |
| inputs.input_ids, | |
| max_new_tokens=80, | |
| temperature=0.7, | |
| do_sample=True, | |
| pad_token_id=tokenizer.eos_token_id | |
| ) | |
| response = tokenizer.decode(outputs[0], skip_special_tokens=True) | |
| return response.split("Assistant:")[-1].strip() | |
| def chat_nanolm(self, message, history): | |
| self.ensure_model_loaded('NanoLM-25M') | |
| tokenizer = self.models['NanoLM-25M']['tokenizer'] | |
| model = self.models['NanoLM-25M']['model'] | |
| # Use chat template for NanoLM | |
| messages = [ | |
| {"role": "system", "content": "You are a helpful assistant."}, | |
| {"role": "user", "content": message} | |
| ] | |
| text = tokenizer.apply_chat_template( | |
| messages, | |
| tokenize=False, | |
| add_generation_prompt=True | |
| ) | |
| inputs = tokenizer([text], return_tensors="pt") | |
| outputs = model.generate( | |
| inputs.input_ids, | |
| max_new_tokens=100, | |
| temperature=0.7, | |
| do_sample=True, | |
| pad_token_id=tokenizer.eos_token_id | |
| ) | |
| generated_ids = [ | |
| output_ids[len(input_ids):] for input_ids, output_ids in zip(inputs.input_ids, outputs) | |
| ] | |
| response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0] | |
| return response | |
| def chat_translator(self, message, history): | |
| self.ensure_model_loaded('NanoTranslator-S') | |
| tokenizer = self.models['NanoTranslator-S']['tokenizer'] | |
| model = self.models['NanoTranslator-S']['model'] | |
| # Use translation prompt format | |
| prompt = f"<|im_start|>{message}<|endoftext|>" | |
| inputs = tokenizer([prompt], return_tensors="pt") | |
| outputs = model.generate( | |
| inputs.input_ids, | |
| max_new_tokens=100, | |
| temperature=0.55, | |
| do_sample=True, | |
| pad_token_id=tokenizer.eos_token_id | |
| ) | |
| generated_ids = [ | |
| output_ids[len(input_ids):] for input_ids, output_ids in zip(inputs.input_ids, outputs) | |
| ] | |
| response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0] | |
| return response | |
| def chat_translator_xl(self, message, history): | |
| self.ensure_model_loaded('NanoTranslator-XL') | |
| tokenizer = self.models['NanoTranslator-XL']['tokenizer'] | |
| model = self.models['NanoTranslator-XL']['model'] | |
| # Use translation prompt format | |
| prompt = f"<|im_start|>{message}<|endoftext|>" | |
| inputs = tokenizer([prompt], return_tensors="pt") | |
| outputs = model.generate( | |
| inputs.input_ids, | |
| max_new_tokens=100, | |
| temperature=0.55, | |
| do_sample=True, | |
| pad_token_id=tokenizer.eos_token_id | |
| ) | |
| generated_ids = [ | |
| output_ids[len(input_ids):] for input_ids, output_ids in zip(inputs.input_ids, outputs) | |
| ] | |
| response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0] | |
| return response | |
| chat_app = MultiModelChat() | |
| def respond(message, history, model_choice): | |
| return chat_app.chat(message, history, model_choice) | |
| with gr.Blocks(theme="soft") as demo: | |
| gr.Markdown("# π€ Multi-Model Tiny Chatbot") | |
| gr.Markdown("*Lightweight AI models for different tasks - Choose the right model for your needs!*") | |
| with gr.Row(): | |
| model_dropdown = gr.Dropdown( | |
| choices=["SmolLM2", "NanoLM-25M", "NanoTranslator-S", "NanoTranslator-XL"], | |
| value="NanoLM-25M", | |
| label="Select Model", | |
| info="Choose the best model for your task" | |
| ) | |
| # Model information display | |
| with gr.Row(): | |
| model_info = gr.Markdown( | |
| """ | |
| ## π NanoLM-25M (25M) - Selected | |
| **Best for:** Quick responses, simple tasks, resource-constrained environments | |
| **Language:** English | |
| **Memory:** ~100MB | |
| **Speed:** Very Fast | |
| π‘ **Tip:** Ultra-lightweight model perfect for fast responses! | |
| """, | |
| visible=True | |
| ) | |
| chatbot = gr.Chatbot(height=400, show_label=False) | |
| msg = gr.Textbox( | |
| label="Message", | |
| placeholder="Type your message here...", | |
| lines=2 | |
| ) | |
| with gr.Row(): | |
| clear = gr.Button("ποΈ Clear Chat", variant="secondary") | |
| submit = gr.Button("π¬ Send", variant="primary") | |
| # Usage tips | |
| with gr.Accordion("π Model Usage Guide", open=False): | |
| gr.Markdown(""" | |
| ### π― When to use each model: | |
| **π΅ SmolLM2 (135M)** | |
| - General conversations and questions | |
| - Creative writing tasks | |
| - Coding help and explanations | |
| - Educational content | |
| **π’ NanoLM-25M (25M)** | |
| - Quick responses when speed matters | |
| - Resource-constrained environments | |
| - Simple Q&A tasks | |
| - Mobile or edge deployment | |
| **π΄ NanoTranslator-S (9M)** | |
| - Fast English β Chinese translation | |
| - Basic translation needs | |
| - Ultra-low memory usage | |
| - Real-time translation | |
| **π‘ NanoTranslator-XL (78M)** | |
| - High-quality English β Chinese translation | |
| - Professional translation work | |
| - Complex sentences and idioms | |
| - Better context understanding | |
| ### π‘ Pro Tips: | |
| - Models load automatically when first selected (lazy loading) | |
| - Translation models work best with clear, complete sentences | |
| - For translation, input English text and get Chinese output | |
| - Restart the app to free up memory from unused models | |
| """) | |
| def update_model_info(model_choice): | |
| info_map = { | |
| "SmolLM2": """ | |
| ## π SmolLM2 (135M) - Selected | |
| **Best for:** General conversation, Q&A, creative writing, coding help | |
| **Language:** English | |
| **Memory:** ~500MB | |
| **Speed:** Fast | |
| π‘ **Tip:** Great all-around model for most conversational tasks! | |
| """, | |
| "NanoLM-25M": """ | |
| ## π NanoLM-25M (25M) - Selected | |
| **Best for:** Quick responses, simple tasks, resource-constrained environments | |
| **Language:** English | |
| **Memory:** ~100MB | |
| **Speed:** Very Fast | |
| π‘ **Tip:** Ultra-lightweight model perfect for fast responses! | |
| """, | |
| "NanoTranslator-S": """ | |
| ## π NanoTranslator-S (9M) - Selected | |
| **Best for:** Fast English β Chinese translation | |
| **Language:** English β Chinese | |
| **Memory:** ~50MB | |
| **Speed:** Very Fast | |
| π‘ **Tip:** Input English text to get Chinese translation. Great for quick translations! | |
| """, | |
| "NanoTranslator-XL": """ | |
| ## π NanoTranslator-XL (78M) - Selected | |
| **Best for:** High-quality English β Chinese translation | |
| **Language:** English β Chinese | |
| **Memory:** ~300MB | |
| **Speed:** Fast | |
| π‘ **Tip:** Best translation quality for complex sentences and professional use! | |
| """ | |
| } | |
| return info_map.get(model_choice, "") | |
| # Update model info when dropdown changes | |
| model_dropdown.change( | |
| update_model_info, | |
| inputs=[model_dropdown], | |
| outputs=[model_info] | |
| ) | |
| def user_message(message, history): | |
| return "", history + [[message, None]] | |
| def bot_message(history, model_choice): | |
| user_msg = history[-1][0] | |
| bot_response = chat_app.chat(user_msg, history[:-1], model_choice) | |
| history[-1][1] = bot_response | |
| return history | |
| # Handle message submission | |
| msg.submit(user_message, [msg, chatbot], [msg, chatbot]).then( | |
| bot_message, [chatbot, model_dropdown], chatbot | |
| ) | |
| submit.click(user_message, [msg, chatbot], [msg, chatbot]).then( | |
| bot_message, [chatbot, model_dropdown], chatbot | |
| ) | |
| clear.click(lambda: None, None, chatbot, queue=False) | |
| demo.launch() |