Spaces:

inaamullah-younas
/

HEP_B_Chatbot0.1

Sleeping

App Files Files Community

HEP_B_Chatbot0.1 / app.py

inaamullah-younas

Update app.py

17e49a6 verified 10 months ago

raw

history blame

5.58 kB

	import gradio as gr
	import chromadb
	from transformers import AutoModel, AutoTokenizer, AutoModelForCausalLM

	import torch

	# 🔹 Load Mistral-7B for LLM Responses
	import os
	from transformers import AutoModelForCausalLM, AutoTokenizer

	# 🔹 Load API Token from Hugging Face Secrets
	HF_TOKEN = os.getenv("api_key") # ✅ Securely load API key

	# 🔹 Ensure API Token is Loaded
	if HF_TOKEN is None:
	raise ValueError("❌ Hugging Face API token not found. Add `HF_TOKEN` in Hugging Face Secrets.")

	# 🔹 Load Mistral-7B-Instruct with Authentication
	llm_name = "mistralai/Mistral-7B-Instruct-v0.1"
	llm_tokenizer = AutoTokenizer.from_pretrained(llm_name, use_auth_token=HF_TOKEN)
	llm_model = AutoModelForCausalLM.from_pretrained(
	llm_name,
	torch_dtype=torch.float16,
	device_map="auto",
	use_auth_token=HF_TOKEN
	)


	# 🔹 Optimize Mistral for Faster Inference
	torch.backends.cuda.matmul.allow_tf32 = True
	torch.backends.cudnn.benchmark = True
	llm_model = torch.compile(llm_model)

	# 🔹 Initialize ChromaDB
	import os
	import zipfile

	# 🔹 Unzip ChromaDB database if not extracted
	if not os.path.exists("./chroma_db"):
	with zipfile.ZipFile("chroma_db.zip", 'r') as zip_ref:
	zip_ref.extractall("./")

	print("✅ ChromaDB database loaded!")

	import chromadb

	# 🔹 Load ChromaDB from local storage
	chroma_client = chromadb.PersistentClient(path="./chroma_db")
	collection = chroma_client.get_or_create_collection(name="hepB_knowledge")

	print("✅ ChromaDB initialized!")


	# 🔹 Function to Generate LLM Responses
	import torch

	# 🔹 Detect Device
	device = "cuda" if torch.cuda.is_available() else "cpu"
	print(f"✅ Using device: {device}")

	def generate_humanized_response(query, retrieved_text):
	"""Passes retrieved chunks through Mistral-7B to improve readability."""

	# 🔹 Truncate retrieved text to avoid long input errors
	retrieved_text = retrieved_text[:500]

	prompt = f"""You are a medical assistant. Answer the following question based on retrieved text:

	Retrieved Text:
	{retrieved_text}

	User Query: {query}

	Provide a well-structured, human-like response:
	"""

	inputs = llm_tokenizer(prompt, return_tensors="pt").to(device) # ✅ Uses GPU if available, otherwise CPU
	output = llm_model.generate(**inputs, max_new_tokens=150, do_sample=True)
	response = llm_tokenizer.decode(output[0], skip_special_tokens=True)

	return response


	# 🔹 Load BioMedBERT for Embeddings
	embed_model_name = "microsoft/BiomedNLP-BiomedBERT-base-uncased-abstract"
	embed_tokenizer = AutoTokenizer.from_pretrained(embed_model_name)
	embed_model = AutoModel.from_pretrained(embed_model_name)

	device = "cuda" if torch.cuda.is_available() else "cpu"
	embed_model.to(device)

	# 🔹 Function to Generate Text Embeddings
	def get_embedding(text):
	"""Generates BioMedBERT embeddings using the CLS token (max 512 tokens)."""
	inputs = embed_tokenizer(
	text,
	return_tensors="pt",
	truncation=True,
	padding="max_length",
	max_length=512
	).to(device)

	with torch.no_grad():
	outputs = embed_model(**inputs)

	cls_embedding = outputs.last_hidden_state[:, 0, :].cpu() # Move back to CPU
	return cls_embedding.squeeze().numpy().tolist()

	# 🔹 Function to Retrieve Similar Chunks
	def retrieve_similar_chunks(query, top_k=5, similarity_threshold=0.5):
	"""Finds top-k similar chunks from ChromaDB using cosine similarity."""
	print("🔹 Generating embedding for query...")
	query_embedding = get_embedding(query)

	print("🔹 Querying ChromaDB...")
	results = collection.query(
	query_embeddings=[query_embedding],
	n_results=top_k
	)

	# ✅ Check if results are empty before accessing scores
	if not results["documents"] or not results["distances"]:
	print("❌ No relevant chunks found in ChromaDB.")
	return ["Sorry, I couldn't find relevant information."]

	print(f"🔹 Retrieved {len(results['documents'])} chunks from ChromaDB.")

	# 🔍 Print similarity scores
	for i, score in enumerate(results["distances"]):
	print(f"Chunk {i+1} Score: {score}")

	# 🔍 Filter out low-score chunks
	filtered_results = []
	for doc, scores in zip(results["documents"], results["distances"]):
	if scores and scores[0] >= similarity_threshold: # ✅ Avoid IndexError
	filtered_results.append(doc)

	print("✅ Retrieval completed.")
	return filtered_results if filtered_results else ["Sorry, I couldn't find relevant information."]


	# 🔹 Chatbot Function
	def chatbot(query):
	"""Returns a structured and human-like answer using Mistral-7B."""
	retrieved_chunks = retrieve_similar_chunks(query)

	if not retrieved_chunks or retrieved_chunks == ["No relevant information found."]:
	return "Sorry, I couldn't find relevant information."

	retrieved_texts = [chunk if isinstance(chunk, str) else " ".join(chunk) for chunk in retrieved_chunks]
	retrieved_text = "\n\n".join(retrieved_texts)[:500]

	response = generate_humanized_response(query, retrieved_text)
	return response

	# 🔹 Gradio Chat Interface
	ui = gr.Interface(
	fn=chatbot,
	inputs=gr.Textbox(lines=2, placeholder="Ask about Hepatitis B..."),
	outputs=gr.Textbox(),
	title="💡 Hepatitis B Chatbot",
	description="⚕️ Ask questions based on WHO Hepatitis B guidelines (2024). Uses ChromaDB & Mistral-7B for responses.",
	)

	# 🔥 Run the Chatbot
	if __name__ == "__main__":
	ui.launch()