KaiquanMah's picture
Update retriever.py
f96da9e verified
raw
history blame
2.19 kB
# retriever tool
from langchain.tools import Tool
from langchain_community.retrievers import BM25Retriever
# load dataset
import datasets
from langchain.docstore.document import Document
# Load dataset and create documents
guest_dataset = datasets.load_dataset("agents-course/unit3-invitees", split="train")
docs = [
Document(
page_content="\n".join([
f"Name: {guest['name']}",
f"Relation: {guest['relation']}",
f"Description: {guest['description']}",
f"Email: {guest['email']}"
]),
metadata={"name": guest["name"]}
)
for guest in guest_dataset
]
###########################
# Retriever - changed to LangGraph version
###########################
bm25_retriever = BM25Retriever.from_documents(docs)
def extract_text(query: str) -> str:
"""Retrieves detailed information about gala guests based on their name or relation."""
results = bm25_retriever.invoke(query)
if results:
return "\n\n".join([doc.page_content for doc in results[:3]])
else:
return "No matching guest information found."
guest_info_tool = Tool(
name="guest_info_retriever",
func=extract_text,
description="Retrieves detailed information about gala guests based on their name or relation."
)
###########################
# # (Optional) Remove or update this function if unused
# def load_guest_dataset():
# # This function is no longer needed as docs are created above
# pass
# # no change from smolagents to LangGraph
# def load_guest_dataset():
# # Load the dataset
# guest_dataset = datasets.load_dataset("agents-course/unit3-invitees", split="train")
# # Convert dataset entries into Document objects
# docs = [
# Document(
# page_content="\n".join([
# f"Name: {guest['name']}",
# f"Relation: {guest['relation']}",
# f"Description: {guest['description']}",
# f"Email: {guest['email']}"
# ]),
# metadata={"name": guest["name"]}
# )
# for guest in guest_dataset
# ]
# # Return the tool
# return GuestInfoRetrieverTool(docs)