mohamed2003IX commited on
Commit
0fd7e61
·
verified ·
1 Parent(s): 81917a3

Upload 5 files

Browse files
Files changed (5) hide show
  1. main.py +70 -0
  2. models.py +0 -0
  3. prompts.py +73 -0
  4. state.py +25 -0
  5. tools.py +48 -0
main.py ADDED
@@ -0,0 +1,70 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from langgraph.graph import StateGraph, END
2
+ from langchain_openai import ChatOpenAI
3
+ from langchain_core.messages import HumanMessage, BaseMessage
4
+ from prompts import main_prompt, research_agent_prompt # تأكد إن analyzer_prompt متعرف
5
+ from tools import search
6
+ from state import search_keys, AgentState
7
+ import sys
8
+ sys.stdout.reconfigure(encoding="utf-8")
9
+
10
+
11
+ # -------- Models -------- #
12
+ boss_model = ChatOpenAI(
13
+ model="meta-llama/llama-4-maverick:free",
14
+ openai_api_key="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",
15
+ base_url="https://openrouter.ai/api/v1",
16
+ temperature=0.5,
17
+ max_tokens=1024,
18
+ top_p=0.5,
19
+ ).with_structured_output(search_keys)
20
+
21
+ analyzer_model = ChatOpenAI(
22
+ model="openrouter/sonoma-sky-alpha",
23
+ openai_api_key="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",
24
+ base_url="https://openrouter.ai/api/v1",
25
+ )
26
+
27
+
28
+ # -------- Nodes -------- #
29
+ def boss_node(state: AgentState) -> AgentState:
30
+ if not state.get("messages"):
31
+ raise ValueError("No messages found in state. Please provide at least one HumanMessage.")
32
+
33
+ last_message: BaseMessage = state["messages"][-1]
34
+ user_text = getattr(last_message, "content", str(last_message))
35
+
36
+ query = boss_model.invoke(f"{main_prompt}\nUser: {user_text}")
37
+ result = search(query.query, query.Topic)
38
+ state["search_content"] = result
39
+ return state
40
+
41
+
42
+ def analyzer_node(state: AgentState) -> AgentState:
43
+ state["search_results"] = analyzer_model.invoke(
44
+ f"{research_agent_prompt}\n{state['search_content']}"
45
+ )
46
+ return state
47
+
48
+
49
+ # -------- Graph -------- #
50
+ graph = StateGraph(AgentState)
51
+ graph.add_node("boss", boss_node)
52
+ graph.add_node("analyzer", analyzer_node)
53
+ graph.add_edge("boss", "analyzer")
54
+ graph.add_edge("analyzer", END)
55
+ graph.set_entry_point("boss")
56
+ app = graph.compile()
57
+
58
+
59
+ # -------- Run Tests -------- #
60
+ if __name__ == "__main__":
61
+ # Test stream with Chinese input
62
+ for event in app.stream({"messages": [HumanMessage(content="what is best player in football in all time ")]}):
63
+ if "analyzer" in event:
64
+ print(":: answer is -->")
65
+
66
+ # Test invoke with football query
67
+ result = app.invoke(
68
+ {"messages": [HumanMessage(content="what is capital of egypt")]}
69
+ )
70
+ print(result["search_results"].content)
models.py ADDED
File without changes
prompts.py ADDED
@@ -0,0 +1,73 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+
3
+ main_prompt = """SYSTEM / PROMPT for the model:
4
+
5
+ You are a Search Query Generator. Your job: read a user's question, determine the user's intent precisely, and produce a single JSON object that exactly matches this Pydantic structure:
6
+
7
+ class search_keys(BaseModel):
8
+ query: str = Field(description='this is the query that the user want to search')
9
+ Topic: str = Field(description='this is the topic that the user want to search about')
10
+
11
+ OUTPUT RULES (must follow exactly):
12
+ 1. Return ONLY a single valid JSON object and nothing else (no explanation, no extra text, no code fences).
13
+ 2. JSON must have two keys: "query" (lowercase) and "Topic" (capital T).
14
+ 3. "query" must be a concise, search-engine-optimized query (3–12 words is ideal). Include important entities, language, format or operators (e.g., site:, filetype:, exact-phrase quotes) when helpful.
15
+ 4. "Topic" must be a short label (2–4 words, Title Case) that captures the user's high-level topic or intent.
16
+ 5. If the user’s question is ambiguous, pick the most likely interpretation; append " (ambiguous)" to the Topic to signal uncertainty.
17
+
18
+ INTERNAL PROCESS (do NOT output these internal steps; use them to guide generation):
19
+ - Step 1: Identify main intent (what answer the user expects: name, tutorial, product, comparison, code snippet, font name, etc.).
20
+ - Step 2: Detect language and location/time constraints in the question (e.g., Arabic, "in Canva", "latest 2024", etc.).
21
+ - Step 3: Extract specific entities/keywords and preferred format (e.g., "Canva", "Thuluth", "font name", "PNG", "example code").
22
+ - Step 4: Compose a short, high-precision search query tuned for web search.
23
+ - Step 5: Generate a concise Topic label.
24
+
25
+ EXAMPLE USAGE:
26
+ User input: "اسم للخط الثلث في كانفا بس انجليزي ابحث"
27
+ -> Output JSON: {"query":"Canva Thuluth font name", "Topic":"Arabic Fonts"}
28
+
29
+ User input: "How to convert a pandas DataFrame to partitioned parquet?"
30
+ -> Output JSON: {"query":"convert pandas DataFrame to partitioned parquet example", "Topic":"Pandas - Storage"}
31
+
32
+ User input (ambiguous): "apple battery life"
33
+ -> Output JSON: {"query":"iPhone battery life iOS battery drain tests", "Topic":"Apple Battery (ambiguous)"}
34
+
35
+ """
36
+
37
+ research_agent_prompt = """
38
+ You are SearchAnalysisAgent.
39
+ You receive a "query" string and a "topic".
40
+ You must use the search tool to gather results, but the topic must always be one of: "general", "news", or "finance".
41
+
42
+ TOOLS:
43
+ - search(query: str, topic: Literal["general", "news", "finance"]) → returns top web results.
44
+
45
+ INSTRUCTIONS:
46
+ 1. Ensure the topic is strictly one of: "general", "news", "finance". If the query does not clearly fit "news" or "finance", default to "general".
47
+ 2. Call the search tool with the given query and topic.
48
+ 3. Carefully read the results.
49
+ 4. Extract the most relevant facts that directly answer the query.
50
+ 5. Summarize them into a clear, concise answer.
51
+ 6. Do not include raw links, metadata, or irrelevant text.
52
+ 7. Always respond in the language of the original query, if obvious.
53
+ 8. Do not output JSON or code — only natural language.
54
+
55
+ FORMAT OF OUTPUT:
56
+ - A single summarized answer in plain text.
57
+ - If there are multiple possibilities, list them briefly as alternatives.
58
+
59
+ EXAMPLE:
60
+
61
+ Query: "Canva Thuluth font English name" (topic: general)
62
+ → (search results)
63
+ → Answer: "The closest Canva fonts to Arabic Thuluth are *Aref Ruqaa Ink*, *Amiri*, and *Scheherazade New*."
64
+
65
+ Query: "convert pandas DataFrame to partitioned parquet example" (topic: general)
66
+ → (search results)
67
+ → Answer: "You can save partitioned Parquet files in Pandas using:
68
+ `df.to_parquet('output_dir', partition_cols=['col'])`."
69
+
70
+ Query: "latest inflation rates in the US" (topic: finance)
71
+ → (search results)
72
+ → Answer: "US inflation in August 2025 was 2.7%, slightly higher than July's 2.5%."
73
+ """
state.py ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from dataclasses import Field
2
+ from langgraph.pregel.main import Topic
3
+ from typing_extensions import Annotated,Sequence,List,TypedDict
4
+ from pydantic import BaseModel ,Field
5
+ from langchain_core.messages import BaseMessage
6
+ from langgraph.graph.message import add_messages
7
+ from typing import Literal
8
+ class AgentState(TypedDict):
9
+ """
10
+ Represents the state of the agent.
11
+
12
+ Attributes:
13
+ messages (Annotated[Sequence[BaseMessage], add_messages]): The sequence of messages in the conversation.
14
+ """
15
+ messages: Annotated[Sequence[BaseMessage], add_messages]
16
+ search_results: List[str]
17
+ search_content:str
18
+ query: str
19
+
20
+ class search_keys(BaseModel):
21
+ query:str = Field(description='this is the query that the user want to search')
22
+ Topic:Literal["general", "news", "finance"] = Field(description='this is the topic that the user want to search about')
23
+
24
+
25
+
tools.py ADDED
@@ -0,0 +1,48 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import List, Optional
2
+ from tavily import TavilyClient
3
+ from langchain_core.tools import tool
4
+ import time
5
+
6
+
7
+ client =TavilyClient('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')
8
+
9
+
10
+ def search(query: str, topic: Optional[str] = None, max_results: int = 5) -> List[str]:
11
+ """
12
+ Perform a Tavily search and return only the extracted content from results.
13
+
14
+ Args:
15
+ query (str): The search query string.
16
+ topic (Optional[str]): (Optional) Topic/domain to refine the search.
17
+ max_results (int): Maximum number of search results to retrieve.
18
+
19
+ Returns:
20
+ List[str]: A list of content snippets extracted from the search results.
21
+ """
22
+ try:
23
+ response = client.search(
24
+ query=query,
25
+ topic=topic,
26
+ max_results=max_results,
27
+ search_depth="advanced",
28
+ )
29
+
30
+ # Extract only "content" fields safely
31
+ contents = [
32
+ result.get("content", "").strip()
33
+ for result in response.get("results", [])
34
+ if result.get("content")
35
+ ]
36
+
37
+ return contents if contents else ["No content found."]
38
+
39
+ except Exception as e:
40
+ return [f"Search failed: {str(e)}"]
41
+
42
+ def get_time():
43
+ return time.time()
44
+
45
+ print(get_time())
46
+
47
+
48
+