import gradio as gr import pandas as pd import faiss from sentence_transformers import SentenceTransformer from transformers import pipeline # Load abstracts df = pd.read_csv("abstracts.csv") abstracts = df["abstract"].tolist() # Load embedding model embedder = SentenceTransformer("all-MiniLM-L6-v2") abstract_embeddings = embedder.encode(abstracts, show_progress_bar=True) # Build FAISS index index = faiss.IndexFlatL2(abstract_embeddings.shape[1]) index.add(abstract_embeddings) # Load LLM from Hugging Face Hub llm = pipeline("text-generation", model="tiiuae/falcon-7b-instruct", max_new_tokens=300) def verify_claim(claim): query_vec = embedder.encode([claim]) D, I = index.search(query_vec, 3) top_abstracts = df.iloc[I[0]]["abstract"].tolist() context = "\n".join(top_abstracts) prompt = f"Claim: {claim}\n\nEvidence:\n{context}\n\nAnswer True, False, or Uncertain. Then explain why:\n" output = llm(prompt)[0]["generated_text"] return f"šŸ” **Top Abstracts:**\n{context}\n\n🧠 **LLM Response:**\n{output}" # Gradio UI gr.Interface( fn=verify_claim, inputs=gr.Textbox(label="Enter a scientific claim"), outputs=gr.Markdown(), title="šŸ”¬ Scientific Claim Verifier", description="Checks the validity of a scientific claim using PubMed abstracts + LLM" ).launch()