import gradio as gr import torch import unsloth from unsloth import FastLanguageModel from transformers import TextStreamer # Define model and tokenizer loading parameters MODEL_NAME = "unsloth/llama-3-8b-Instruct-bnb-4bit" max_seq_length = 2048 load_in_4bit = True dtype = None # Load the base model and then the fine-tuned LoRA adapter model, tokenizer = FastLanguageModel.from_pretrained( model_name=MODEL_NAME, max_seq_length=max_seq_length, dtype=dtype, load_in_4bit=load_in_4bit, ) # Load the LoRA adapter from the saved directory # This assumes 'hplc_lora' is in the current working directory # or in the directory where the app will be deployed. model.load_adapter("hplc_lora") # Ensure tokenizer pad_token is set for generation tokenizer.pad_token = tokenizer.eos_token tokenizer.padding_side = "right" # --- Simple inference function using your fine-tuned model --- def hplc_chat(user_prompt): system_prompt = ( "You are an expert analytical chemist specializing in HPLC method development " "and troubleshooting. Give concise, step-by-step actions with reasoning, " "numeric targets, and acceptance criteria." ) # Combine into chat-style prompt string text = f"<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n{system_prompt}\n\n" \ f"<|start_header_id|>user<|end_header_id|>\n{user_prompt}\n\n" \ f"<|start_header_id|>assistant<|end_header_id|>\n" inputs = tokenizer([text], return_tensors="pt").to(model.device) with torch.no_grad(): output = model.generate( **inputs, max_new_tokens=400, temperature=0.4, top_p=0.9, eos_token_id=tokenizer.eos_token_id, # Use EOS token to stop generation pad_token_id=tokenizer.pad_token_id # Use pad_token_id during generation ) answer = tokenizer.decode(output[0], skip_special_tokens=True) # Extract only the assistant's response part return answer.split("assistant")[-1].strip() # --- Build the Gradio interface --- demo = gr.Interface( fn=hplc_chat, inputs=gr.Textbox(label="Enter your HPLC question or problem", lines=5, placeholder="e.g., Retention time drift >5%"), outputs=gr.Textbox(label="Model response", lines=12), title="🧪 Llama-3 HPLC Method Development & Troubleshooting", description="Ask about gradient setup, peak shape, pressure, or column issues. The model will suggest step-by-step fixes and acceptance criteria.", ) # Launch the Gradio app if the script is run directly if __name__ == "__main__": demo.launch(share=True)