Upload 5 files
#252
by
mohamed2003IX
- opened
- main.py +70 -0
- models.py +0 -0
- prompts.py +73 -0
- state.py +25 -0
- tools.py +48 -0
main.py
ADDED
|
@@ -0,0 +1,70 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from langgraph.graph import StateGraph, END
|
| 2 |
+
from langchain_openai import ChatOpenAI
|
| 3 |
+
from langchain_core.messages import HumanMessage, BaseMessage
|
| 4 |
+
from prompts import main_prompt, research_agent_prompt # تأكد إن analyzer_prompt متعرف
|
| 5 |
+
from tools import search
|
| 6 |
+
from state import search_keys, AgentState
|
| 7 |
+
import sys
|
| 8 |
+
sys.stdout.reconfigure(encoding="utf-8")
|
| 9 |
+
|
| 10 |
+
|
| 11 |
+
# -------- Models -------- #
|
| 12 |
+
boss_model = ChatOpenAI(
|
| 13 |
+
model="meta-llama/llama-4-maverick:free",
|
| 14 |
+
openai_api_key="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",
|
| 15 |
+
base_url="https://openrouter.ai/api/v1",
|
| 16 |
+
temperature=0.5,
|
| 17 |
+
max_tokens=1024,
|
| 18 |
+
top_p=0.5,
|
| 19 |
+
).with_structured_output(search_keys)
|
| 20 |
+
|
| 21 |
+
analyzer_model = ChatOpenAI(
|
| 22 |
+
model="openrouter/sonoma-sky-alpha",
|
| 23 |
+
openai_api_key="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",
|
| 24 |
+
base_url="https://openrouter.ai/api/v1",
|
| 25 |
+
)
|
| 26 |
+
|
| 27 |
+
|
| 28 |
+
# -------- Nodes -------- #
|
| 29 |
+
def boss_node(state: AgentState) -> AgentState:
|
| 30 |
+
if not state.get("messages"):
|
| 31 |
+
raise ValueError("No messages found in state. Please provide at least one HumanMessage.")
|
| 32 |
+
|
| 33 |
+
last_message: BaseMessage = state["messages"][-1]
|
| 34 |
+
user_text = getattr(last_message, "content", str(last_message))
|
| 35 |
+
|
| 36 |
+
query = boss_model.invoke(f"{main_prompt}\nUser: {user_text}")
|
| 37 |
+
result = search(query.query, query.Topic)
|
| 38 |
+
state["search_content"] = result
|
| 39 |
+
return state
|
| 40 |
+
|
| 41 |
+
|
| 42 |
+
def analyzer_node(state: AgentState) -> AgentState:
|
| 43 |
+
state["search_results"] = analyzer_model.invoke(
|
| 44 |
+
f"{research_agent_prompt}\n{state['search_content']}"
|
| 45 |
+
)
|
| 46 |
+
return state
|
| 47 |
+
|
| 48 |
+
|
| 49 |
+
# -------- Graph -------- #
|
| 50 |
+
graph = StateGraph(AgentState)
|
| 51 |
+
graph.add_node("boss", boss_node)
|
| 52 |
+
graph.add_node("analyzer", analyzer_node)
|
| 53 |
+
graph.add_edge("boss", "analyzer")
|
| 54 |
+
graph.add_edge("analyzer", END)
|
| 55 |
+
graph.set_entry_point("boss")
|
| 56 |
+
app = graph.compile()
|
| 57 |
+
|
| 58 |
+
|
| 59 |
+
# -------- Run Tests -------- #
|
| 60 |
+
if __name__ == "__main__":
|
| 61 |
+
# Test stream with Chinese input
|
| 62 |
+
for event in app.stream({"messages": [HumanMessage(content="what is best player in football in all time ")]}):
|
| 63 |
+
if "analyzer" in event:
|
| 64 |
+
print(":: answer is -->")
|
| 65 |
+
|
| 66 |
+
# Test invoke with football query
|
| 67 |
+
result = app.invoke(
|
| 68 |
+
{"messages": [HumanMessage(content="what is capital of egypt")]}
|
| 69 |
+
)
|
| 70 |
+
print(result["search_results"].content)
|
models.py
ADDED
|
File without changes
|
prompts.py
ADDED
|
@@ -0,0 +1,73 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
|
| 2 |
+
|
| 3 |
+
main_prompt = """SYSTEM / PROMPT for the model:
|
| 4 |
+
|
| 5 |
+
You are a Search Query Generator. Your job: read a user's question, determine the user's intent precisely, and produce a single JSON object that exactly matches this Pydantic structure:
|
| 6 |
+
|
| 7 |
+
class search_keys(BaseModel):
|
| 8 |
+
query: str = Field(description='this is the query that the user want to search')
|
| 9 |
+
Topic: str = Field(description='this is the topic that the user want to search about')
|
| 10 |
+
|
| 11 |
+
OUTPUT RULES (must follow exactly):
|
| 12 |
+
1. Return ONLY a single valid JSON object and nothing else (no explanation, no extra text, no code fences).
|
| 13 |
+
2. JSON must have two keys: "query" (lowercase) and "Topic" (capital T).
|
| 14 |
+
3. "query" must be a concise, search-engine-optimized query (3–12 words is ideal). Include important entities, language, format or operators (e.g., site:, filetype:, exact-phrase quotes) when helpful.
|
| 15 |
+
4. "Topic" must be a short label (2–4 words, Title Case) that captures the user's high-level topic or intent.
|
| 16 |
+
5. If the user’s question is ambiguous, pick the most likely interpretation; append " (ambiguous)" to the Topic to signal uncertainty.
|
| 17 |
+
|
| 18 |
+
INTERNAL PROCESS (do NOT output these internal steps; use them to guide generation):
|
| 19 |
+
- Step 1: Identify main intent (what answer the user expects: name, tutorial, product, comparison, code snippet, font name, etc.).
|
| 20 |
+
- Step 2: Detect language and location/time constraints in the question (e.g., Arabic, "in Canva", "latest 2024", etc.).
|
| 21 |
+
- Step 3: Extract specific entities/keywords and preferred format (e.g., "Canva", "Thuluth", "font name", "PNG", "example code").
|
| 22 |
+
- Step 4: Compose a short, high-precision search query tuned for web search.
|
| 23 |
+
- Step 5: Generate a concise Topic label.
|
| 24 |
+
|
| 25 |
+
EXAMPLE USAGE:
|
| 26 |
+
User input: "اسم للخط الثلث في كانفا بس انجليزي ابحث"
|
| 27 |
+
-> Output JSON: {"query":"Canva Thuluth font name", "Topic":"Arabic Fonts"}
|
| 28 |
+
|
| 29 |
+
User input: "How to convert a pandas DataFrame to partitioned parquet?"
|
| 30 |
+
-> Output JSON: {"query":"convert pandas DataFrame to partitioned parquet example", "Topic":"Pandas - Storage"}
|
| 31 |
+
|
| 32 |
+
User input (ambiguous): "apple battery life"
|
| 33 |
+
-> Output JSON: {"query":"iPhone battery life iOS battery drain tests", "Topic":"Apple Battery (ambiguous)"}
|
| 34 |
+
|
| 35 |
+
"""
|
| 36 |
+
|
| 37 |
+
research_agent_prompt = """
|
| 38 |
+
You are SearchAnalysisAgent.
|
| 39 |
+
You receive a "query" string and a "topic".
|
| 40 |
+
You must use the search tool to gather results, but the topic must always be one of: "general", "news", or "finance".
|
| 41 |
+
|
| 42 |
+
TOOLS:
|
| 43 |
+
- search(query: str, topic: Literal["general", "news", "finance"]) → returns top web results.
|
| 44 |
+
|
| 45 |
+
INSTRUCTIONS:
|
| 46 |
+
1. Ensure the topic is strictly one of: "general", "news", "finance". If the query does not clearly fit "news" or "finance", default to "general".
|
| 47 |
+
2. Call the search tool with the given query and topic.
|
| 48 |
+
3. Carefully read the results.
|
| 49 |
+
4. Extract the most relevant facts that directly answer the query.
|
| 50 |
+
5. Summarize them into a clear, concise answer.
|
| 51 |
+
6. Do not include raw links, metadata, or irrelevant text.
|
| 52 |
+
7. Always respond in the language of the original query, if obvious.
|
| 53 |
+
8. Do not output JSON or code — only natural language.
|
| 54 |
+
|
| 55 |
+
FORMAT OF OUTPUT:
|
| 56 |
+
- A single summarized answer in plain text.
|
| 57 |
+
- If there are multiple possibilities, list them briefly as alternatives.
|
| 58 |
+
|
| 59 |
+
EXAMPLE:
|
| 60 |
+
|
| 61 |
+
Query: "Canva Thuluth font English name" (topic: general)
|
| 62 |
+
→ (search results)
|
| 63 |
+
→ Answer: "The closest Canva fonts to Arabic Thuluth are *Aref Ruqaa Ink*, *Amiri*, and *Scheherazade New*."
|
| 64 |
+
|
| 65 |
+
Query: "convert pandas DataFrame to partitioned parquet example" (topic: general)
|
| 66 |
+
→ (search results)
|
| 67 |
+
→ Answer: "You can save partitioned Parquet files in Pandas using:
|
| 68 |
+
`df.to_parquet('output_dir', partition_cols=['col'])`."
|
| 69 |
+
|
| 70 |
+
Query: "latest inflation rates in the US" (topic: finance)
|
| 71 |
+
→ (search results)
|
| 72 |
+
→ Answer: "US inflation in August 2025 was 2.7%, slightly higher than July's 2.5%."
|
| 73 |
+
"""
|
state.py
ADDED
|
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from dataclasses import Field
|
| 2 |
+
from langgraph.pregel.main import Topic
|
| 3 |
+
from typing_extensions import Annotated,Sequence,List,TypedDict
|
| 4 |
+
from pydantic import BaseModel ,Field
|
| 5 |
+
from langchain_core.messages import BaseMessage
|
| 6 |
+
from langgraph.graph.message import add_messages
|
| 7 |
+
from typing import Literal
|
| 8 |
+
class AgentState(TypedDict):
|
| 9 |
+
"""
|
| 10 |
+
Represents the state of the agent.
|
| 11 |
+
|
| 12 |
+
Attributes:
|
| 13 |
+
messages (Annotated[Sequence[BaseMessage], add_messages]): The sequence of messages in the conversation.
|
| 14 |
+
"""
|
| 15 |
+
messages: Annotated[Sequence[BaseMessage], add_messages]
|
| 16 |
+
search_results: List[str]
|
| 17 |
+
search_content:str
|
| 18 |
+
query: str
|
| 19 |
+
|
| 20 |
+
class search_keys(BaseModel):
|
| 21 |
+
query:str = Field(description='this is the query that the user want to search')
|
| 22 |
+
Topic:Literal["general", "news", "finance"] = Field(description='this is the topic that the user want to search about')
|
| 23 |
+
|
| 24 |
+
|
| 25 |
+
|
tools.py
ADDED
|
@@ -0,0 +1,48 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from typing import List, Optional
|
| 2 |
+
from tavily import TavilyClient
|
| 3 |
+
from langchain_core.tools import tool
|
| 4 |
+
import time
|
| 5 |
+
|
| 6 |
+
|
| 7 |
+
client =TavilyClient('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')
|
| 8 |
+
|
| 9 |
+
|
| 10 |
+
def search(query: str, topic: Optional[str] = None, max_results: int = 5) -> List[str]:
|
| 11 |
+
"""
|
| 12 |
+
Perform a Tavily search and return only the extracted content from results.
|
| 13 |
+
|
| 14 |
+
Args:
|
| 15 |
+
query (str): The search query string.
|
| 16 |
+
topic (Optional[str]): (Optional) Topic/domain to refine the search.
|
| 17 |
+
max_results (int): Maximum number of search results to retrieve.
|
| 18 |
+
|
| 19 |
+
Returns:
|
| 20 |
+
List[str]: A list of content snippets extracted from the search results.
|
| 21 |
+
"""
|
| 22 |
+
try:
|
| 23 |
+
response = client.search(
|
| 24 |
+
query=query,
|
| 25 |
+
topic=topic,
|
| 26 |
+
max_results=max_results,
|
| 27 |
+
search_depth="advanced",
|
| 28 |
+
)
|
| 29 |
+
|
| 30 |
+
# Extract only "content" fields safely
|
| 31 |
+
contents = [
|
| 32 |
+
result.get("content", "").strip()
|
| 33 |
+
for result in response.get("results", [])
|
| 34 |
+
if result.get("content")
|
| 35 |
+
]
|
| 36 |
+
|
| 37 |
+
return contents if contents else ["No content found."]
|
| 38 |
+
|
| 39 |
+
except Exception as e:
|
| 40 |
+
return [f"Search failed: {str(e)}"]
|
| 41 |
+
|
| 42 |
+
def get_time():
|
| 43 |
+
return time.time()
|
| 44 |
+
|
| 45 |
+
print(get_time())
|
| 46 |
+
|
| 47 |
+
|
| 48 |
+
|