sepo25's picture
Update app.py
371d180 verified
raw
history blame
4.35 kB
# app.py - Hugging Face Space Application
# This creates a chat interface for your fine-tuned model
import gradio as gr
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
from peft import PeftModel
# Configuration
BASE_MODEL = "microsoft/phi-2"
ADAPTER_MODEL = "sepo25/my-finetuned-model"
# Global variables to store loaded model
model = None
tokenizer = None
def load_model():
"""Load the base model and fine-tuned adapter"""
global model, tokenizer
print("Loading model... This may take a minute.")
# Load tokenizer
tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL)
tokenizer.pad_token = tokenizer.eos_token
# Load base model
base_model = AutoModelForCausalLM.from_pretrained(
BASE_MODEL,
torch_dtype=torch.float16,
device_map="auto",
trust_remote_code=True
)
# Load fine-tuned adapter
model = PeftModel.from_pretrained(base_model, ADAPTER_MODEL)
print("Model loaded successfully!")
return model, tokenizer
def generate_response(message, chat_history, temperature=0.7, max_tokens=200):
"""Generate a response from the model"""
global model, tokenizer
# Load model if not already loaded
if model is None or tokenizer is None:
load_model()
# Format the prompt
prompt = f"### Instruction:\n{message}\n\n### Response:\n"
# Tokenize
inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
# Generate response
with torch.no_grad():
outputs = model.generate(
**inputs,
max_new_tokens=max_tokens,
temperature=temperature,
do_sample=True,
top_p=0.9,
pad_token_id=tokenizer.eos_token_id
)
# Decode response
full_response = tokenizer.decode(outputs[0], skip_special_tokens=True)
# Extract only the response part (after "### Response:")
if "### Response:" in full_response:
response = full_response.split("### Response:")[-1].strip()
else:
response = full_response
# Update chat history
chat_history.append((message, response))
return "", chat_history
def clear_chat():
"""Clear the chat history"""
return [], []
# Create Gradio interface
with gr.Blocks(title="My Fine-tuned Model Chat", theme=gr.themes.Soft()) as demo:
gr.Markdown(
"""
# πŸ€– Chat with My Fine-tuned Model
This model has been fine-tuned on custom text data and can answer questions and provide summaries.
**Tips:**
- Ask questions about the content it was trained on
- Request summaries of information
- Be specific in your questions for best results
"""
)
chatbot = gr.Chatbot(
label="Conversation",
height=400,
show_label=True
)
with gr.Row():
msg = gr.Textbox(
label="Your message",
placeholder="Type your question here...",
scale=4
)
submit_btn = gr.Button("Send", variant="primary", scale=1)
with gr.Accordion("βš™οΈ Advanced Settings", open=False):
temperature = gr.Slider(
minimum=0.1,
maximum=1.0,
value=0.7,
step=0.1,
label="Temperature (higher = more creative)",
)
max_tokens = gr.Slider(
minimum=50,
maximum=500,
value=200,
step=50,
label="Max tokens (response length)",
)
with gr.Row():
clear_btn = gr.Button("πŸ—‘οΈ Clear Chat")
gr.Markdown(
"""
---
### Example Questions:
- "What is this text about?"
- "Summarize the main points"
- "Tell me about [specific topic from your text]"
"""
)
# Event handlers
submit_btn.click(
generate_response,
inputs=[msg, chatbot, temperature, max_tokens],
outputs=[msg, chatbot]
)
msg.submit(
generate_response,
inputs=[msg, chatbot, temperature, max_tokens],
outputs=[msg, chatbot]
)
clear_btn.click(
clear_chat,
outputs=[chatbot, msg]
)
# Launch the app
if __name__ == "__main__":
demo.launch()