Spaces:
Configuration error
Configuration error
File size: 4,514 Bytes
7e238a2 cd7d51b 7e238a2 6254bcb c76dc92 6254bcb c76dc92 cd7d51b 7e238a2 6254bcb 7e238a2 6254bcb c76dc92 cd7d51b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 |
import os
from dotenv import load_dotenv
from langgraph.prebuilt import ToolNode, tools_condition
from langchain_core.messages import SystemMessage, HumanMessage
from langchain_community.document_loaders import WikipediaLoader, ArxivLoader
from langchain_community.tools.tavily_search import TavilySearchResults
from langchain.tools.retriever import create_retriever_tool
from langchain_core.tools import tool
from supabase.client import Client, create_client
from langchain_community.vectorstores import SupabaseVectorStore
from langchain_huggingface import (
ChatHuggingFace,
HuggingFaceEndpoint,
HuggingFaceEmbeddings,
)
from langgraph.graph import START, StateGraph, MessagesState
load_dotenv()
@tool
def wikipedia_search(query: str) -> str:
"""Search Wikipedia for a query and return maximum 2 results
Args:
query: The search string
"""
docs = WikipediaLoader(query=query, load_max_docs=2).load()
all_search_docs = "\n\n---\n\n".join(
[
f'<Document source="{doc.metadata["source"]}" page="{doc.metadata.get("page", "")}"/>\n{doc.page_content}\n</Document>'
for doc in docs
]
)
return {"wikipedia_results": all_search_docs}
@tool
def web_search(query: str) -> str:
"""Search Tavily for a query and return maximum 3 results.
Args:
query: The search query."""
docs = TavilySearchResults(max_results=3).invoke(query=query)
all_search_docs = "\n\n---\n\n".join(
[
f'<Document source="{doc.metadata["source"]}" page="{doc.metadata.get("page", "")}"/>\n{doc.page_content}\n</Document>'
for doc in docs
]
)
return {"web_results": all_search_docs}
@tool
def arvix_search(query: str) -> str:
"""Search Arxiv for a query and return maximum 3 result.
Args:
query: The search query."""
search_docs = ArxivLoader(query=query, load_max_docs=3).load()
formatted_search_docs = "\n\n---\n\n".join(
[
f'<Document source="{doc.metadata["source"]}" page="{doc.metadata.get("page", "")}"/>\n{doc.page_content[:1000]}\n</Document>'
for doc in search_docs
]
)
return {"arvix_results": formatted_search_docs}
with open("system_prompt.txt", "r", encoding="utf-8") as f:
system_prompt = f.read()
sys_msg = SystemMessage(system_prompt)
supabase: Client = create_client(
os.environ.get("SUPABASE_URL"), os.environ.get("SUPABASE_SERVICE_KEY")
)
supabase_store = SupabaseVectorStore(
client=supabase,
embedding=HuggingFaceEmbeddings(
model_name="sentence-transformers/all-mpnet-base-v2"
),
table_name="search_documents",
query_name="langchain_match_documents",
)
retriever_tool = create_retriever_tool(
retriever=supabase_store.as_retriever(
search_type="similarity", search_kwargs={"k": 5}
),
name="question_search",
description="A tool to retrieve similar questions from a vector store.",
)
tools = [
wikipedia_search,
web_search,
arvix_search,
retriever_tool,
]
def build_graph():
llm = ChatHuggingFace(
llm=HuggingFaceEndpoint(repo_id="Qwen/Qwen2.5-Coder-32B-Instruct"),
)
llm_with_tools = llm.bind_tools(tools)
def assistant(state: MessagesState):
"""Assistant node"""
return {"messages": [llm_with_tools.invoke(state["messages"])]}
def retriever(state: MessagesState):
"""Retriever node"""
similar_question = supabase_store.similarity_search(
state["messages"][0].content
)
print("Similar questions:")
print(similar_question)
if len(similar_question) > 0:
example_msg = HumanMessage(
content=f"Here I provide a similar question and answer for reference: \n\n{similar_question[0].page_content}",
)
# return {"messages": [{"role": "system", "content": similar_question[0].page_content}]}
return {"messages": [sys_msg] + state["messages"] + [example_msg]}
return {"messages": [sys_msg] + state["messages"]}
builder = StateGraph(MessagesState)
builder.add_node("retriever", retriever)
builder.add_node("assistant", assistant)
builder.add_node("tools", ToolNode(tools))
builder.add_edge(START, "retriever")
builder.add_edge("retriever", "assistant")
builder.add_conditional_edges(
"assistant",
tools_condition,
)
builder.add_edge("tools", "assistant")
return builder.compile()
|