Spaces:

HFChewy
/

Llama-3_HPLC_Method_Development__Troubleshooting

Runtime error

App Files Files Community

Llama-3_HPLC_Method_Development__Troubleshooting / app.py

HFChewy

Upload folder using huggingface_hub

6bcbd90 verified about 1 month ago

raw

history blame contribute delete

2.63 kB

	import gradio as gr
	import torch
	import unsloth
	from unsloth import FastLanguageModel
	from transformers import TextStreamer

	# Define model and tokenizer loading parameters
	MODEL_NAME = "unsloth/llama-3-8b-Instruct-bnb-4bit"
	max_seq_length = 2048
	load_in_4bit = True
	dtype = None

	# Load the base model and then the fine-tuned LoRA adapter
	model, tokenizer = FastLanguageModel.from_pretrained(
	model_name=MODEL_NAME,
	max_seq_length=max_seq_length,
	dtype=dtype,
	load_in_4bit=load_in_4bit,
	)

	# Load the LoRA adapter from the saved directory
	# This assumes 'hplc_lora' is in the current working directory
	# or in the directory where the app will be deployed.
	model.load_adapter("hplc_lora")

	# Ensure tokenizer pad_token is set for generation
	tokenizer.pad_token = tokenizer.eos_token
	tokenizer.padding_side = "right"

	# --- Simple inference function using your fine-tuned model ---
	def hplc_chat(user_prompt):
	system_prompt = (
	"You are an expert analytical chemist specializing in HPLC method development "
	"and troubleshooting. Give concise, step-by-step actions with reasoning, "
	"numeric targets, and acceptance criteria."
	)
	# Combine into chat-style prompt string
	text = f"<\|begin_of_text\|><\|start_header_id\|>system<\|end_header_id\|>\n{system_prompt}\n\n" \
	f"<\|start_header_id\|>user<\|end_header_id\|>\n{user_prompt}\n\n" \
	f"<\|start_header_id\|>assistant<\|end_header_id\|>\n"
	inputs = tokenizer([text], return_tensors="pt").to(model.device)

	with torch.no_grad():
	output = model.generate(
	**inputs,
	max_new_tokens=400,
	temperature=0.4,
	top_p=0.9,
	eos_token_id=tokenizer.eos_token_id, # Use EOS token to stop generation
	pad_token_id=tokenizer.pad_token_id # Use pad_token_id during generation
	)
	answer = tokenizer.decode(output[0], skip_special_tokens=True)
	# Extract only the assistant's response part
	return answer.split("assistant")[-1].strip()

	# --- Build the Gradio interface ---
	demo = gr.Interface(
	fn=hplc_chat,
	inputs=gr.Textbox(label="Enter your HPLC question or problem", lines=5, placeholder="e.g., Retention time drift >5%"),
	outputs=gr.Textbox(label="Model response", lines=12),
	title="🧪 Llama-3 HPLC Method Development & Troubleshooting",
	description="Ask about gradient setup, peak shape, pressure, or column issues. The model will suggest step-by-step fixes and acceptance criteria.",
	)

	# Launch the Gradio app if the script is run directly
	if __name__ == "__main__":
	demo.launch(share=True)