HFChewy's picture
Upload folder using huggingface_hub
6bcbd90 verified
import gradio as gr
import torch
import unsloth
from unsloth import FastLanguageModel
from transformers import TextStreamer
# Define model and tokenizer loading parameters
MODEL_NAME = "unsloth/llama-3-8b-Instruct-bnb-4bit"
max_seq_length = 2048
load_in_4bit = True
dtype = None
# Load the base model and then the fine-tuned LoRA adapter
model, tokenizer = FastLanguageModel.from_pretrained(
model_name=MODEL_NAME,
max_seq_length=max_seq_length,
dtype=dtype,
load_in_4bit=load_in_4bit,
)
# Load the LoRA adapter from the saved directory
# This assumes 'hplc_lora' is in the current working directory
# or in the directory where the app will be deployed.
model.load_adapter("hplc_lora")
# Ensure tokenizer pad_token is set for generation
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"
# --- Simple inference function using your fine-tuned model ---
def hplc_chat(user_prompt):
system_prompt = (
"You are an expert analytical chemist specializing in HPLC method development "
"and troubleshooting. Give concise, step-by-step actions with reasoning, "
"numeric targets, and acceptance criteria."
)
# Combine into chat-style prompt string
text = f"<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n{system_prompt}\n\n" \
f"<|start_header_id|>user<|end_header_id|>\n{user_prompt}\n\n" \
f"<|start_header_id|>assistant<|end_header_id|>\n"
inputs = tokenizer([text], return_tensors="pt").to(model.device)
with torch.no_grad():
output = model.generate(
**inputs,
max_new_tokens=400,
temperature=0.4,
top_p=0.9,
eos_token_id=tokenizer.eos_token_id, # Use EOS token to stop generation
pad_token_id=tokenizer.pad_token_id # Use pad_token_id during generation
)
answer = tokenizer.decode(output[0], skip_special_tokens=True)
# Extract only the assistant's response part
return answer.split("assistant")[-1].strip()
# --- Build the Gradio interface ---
demo = gr.Interface(
fn=hplc_chat,
inputs=gr.Textbox(label="Enter your HPLC question or problem", lines=5, placeholder="e.g., Retention time drift >5%"),
outputs=gr.Textbox(label="Model response", lines=12),
title="🧪 Llama-3 HPLC Method Development & Troubleshooting",
description="Ask about gradient setup, peak shape, pressure, or column issues. The model will suggest step-by-step fixes and acceptance criteria.",
)
# Launch the Gradio app if the script is run directly
if __name__ == "__main__":
demo.launch(share=True)