# app.py - Hugging Face Space Application # This creates a chat interface for your fine-tuned model import gradio as gr import torch from transformers import AutoTokenizer, AutoModelForCausalLM from peft import PeftModel # Configuration BASE_MODEL = "microsoft/phi-2" ADAPTER_MODEL = "sepo25/my-finetuned-model" # Global variables to store loaded model model = None tokenizer = None def load_model(): """Load the base model and fine-tuned adapter""" global model, tokenizer print("Loading model... This may take a minute.") # Load tokenizer tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL) tokenizer.pad_token = tokenizer.eos_token # Load base model base_model = AutoModelForCausalLM.from_pretrained( BASE_MODEL, torch_dtype=torch.float16, device_map="auto", trust_remote_code=True ) # Load fine-tuned adapter model = PeftModel.from_pretrained(base_model, ADAPTER_MODEL) print("Model loaded successfully!") return model, tokenizer def generate_response(message, chat_history, temperature=0.7, max_tokens=200): """Generate a response from the model""" global model, tokenizer # Load model if not already loaded if model is None or tokenizer is None: load_model() # Format the prompt prompt = f"### Instruction:\n{message}\n\n### Response:\n" # Tokenize inputs = tokenizer(prompt, return_tensors="pt").to(model.device) # Generate response with torch.no_grad(): outputs = model.generate( **inputs, max_new_tokens=max_tokens, temperature=temperature, do_sample=True, top_p=0.9, pad_token_id=tokenizer.eos_token_id ) # Decode response full_response = tokenizer.decode(outputs[0], skip_special_tokens=True) # Extract only the response part (after "### Response:") if "### Response:" in full_response: response = full_response.split("### Response:")[-1].strip() else: response = full_response # Update chat history chat_history.append((message, response)) return "", chat_history def clear_chat(): """Clear the chat history""" return [], [] # Create Gradio interface with gr.Blocks(title="My Fine-tuned Model Chat", theme=gr.themes.Soft()) as demo: gr.Markdown( """ # 🤖 Chat with My Fine-tuned Model This model has been fine-tuned on custom text data and can answer questions and provide summaries. **Tips:** - Ask questions about the content it was trained on - Request summaries of information - Be specific in your questions for best results """ ) chatbot = gr.Chatbot( label="Conversation", height=400, show_label=True ) with gr.Row(): msg = gr.Textbox( label="Your message", placeholder="Type your question here...", scale=4 ) submit_btn = gr.Button("Send", variant="primary", scale=1) with gr.Accordion("⚙️ Advanced Settings", open=False): temperature = gr.Slider( minimum=0.1, maximum=1.0, value=0.7, step=0.1, label="Temperature (higher = more creative)", ) max_tokens = gr.Slider( minimum=50, maximum=500, value=200, step=50, label="Max tokens (response length)", ) with gr.Row(): clear_btn = gr.Button("🗑️ Clear Chat") gr.Markdown( """ --- ### Example Questions: - "What is this text about?" - "Summarize the main points" - "Tell me about [specific topic from your text]" """ ) # Event handlers submit_btn.click( generate_response, inputs=[msg, chatbot, temperature, max_tokens], outputs=[msg, chatbot] ) msg.submit( generate_response, inputs=[msg, chatbot, temperature, max_tokens], outputs=[msg, chatbot] ) clear_btn.click( clear_chat, outputs=[chatbot, msg] ) # Launch the app if __name__ == "__main__": demo.launch()