ana-35 commited on
Commit
64188e4
·
1 Parent(s): 5c082e6

improved tools

Browse files
Files changed (3) hide show
  1. app.py +17 -27
  2. tools/file_reader.py +22 -23
  3. tools/web_searcher.py +24 -28
app.py CHANGED
@@ -3,13 +3,13 @@ import gradio as gr
3
  import requests
4
  import re
5
  import pandas as pd
6
- from io import BytesIO
7
- from dotenv import load_dotenv
8
  from openai import OpenAI
9
  from tools.web_searcher import web_search_tool
10
  from tools.calculator import calculator_tool
11
  from tools.file_reader import read_file_tool
12
 
 
 
13
  load_dotenv()
14
 
15
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
@@ -20,42 +20,32 @@ class ToolUsingAgent:
20
  self.client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
21
  self.model = "gpt-4o"
22
  self.temperature = 0.0
23
- self.max_tokens = 1000
24
  self.web_search = web_search_tool
25
  self.calculator = calculator_tool
26
  self.file_reader = read_file_tool
27
 
28
  def answer(self, question: str, task_id: str = None) -> str:
29
  try:
30
- # Step 1: Collect tool results if applicable
31
- file_result = None
32
  if task_id:
33
- file_result = self.file_reader.run(task_id)
34
 
35
- web_result = None
36
  if any(kw in question.lower() for kw in ["wikipedia", "search", "google", "find", "ioc", "lookup"]):
37
- web_result = self.web_search.run(question)
38
 
39
- calc_result = None
40
  if any(kw in question.lower() for kw in ["calculate", "math", "sum", "total", "add", "subtract", "multiply", "divide"]):
41
- calc_result = self.calculator.run(question)
42
-
43
- # Step 2: Build tool context dynamically
44
- tool_context = ""
45
- if file_result:
46
- tool_context += f"\nFile content: {file_result}"
47
- if web_result:
48
- tool_context += f"\nWeb search result: {web_result}"
49
- if calc_result:
50
- tool_context += f"\nCalculation result: {calc_result}"
51
-
52
- # Step 3: Generate the prompt for LLM
53
  prompt = f"""
54
- You are an expert AI agent solving complex questions. Follow these strict rules:
55
- 1. Use the tools when necessary: web_search, file_reader, calculator.
56
- 2. Combine tools if required for multi-step questions.
57
- 3. Return only the requested format: single name, number, city, or code. No explanations, no extra text.
58
- 4. If you cannot answer, reply exactly: 'No answer found.'
59
  5. Do not hallucinate or guess.
60
 
61
  Question: {question}
@@ -73,7 +63,7 @@ Answer:"""
73
  def query_llm(self, prompt: str) -> str:
74
  completion = self.client.chat.completions.create(
75
  model=self.model,
76
- messages=[{"role": "user", "content": prompt}],
77
  temperature=self.temperature,
78
  max_tokens=self.max_tokens
79
  )
 
3
  import requests
4
  import re
5
  import pandas as pd
 
 
6
  from openai import OpenAI
7
  from tools.web_searcher import web_search_tool
8
  from tools.calculator import calculator_tool
9
  from tools.file_reader import read_file_tool
10
 
11
+ from dotenv import load_dotenv
12
+
13
  load_dotenv()
14
 
15
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
 
20
  self.client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
21
  self.model = "gpt-4o"
22
  self.temperature = 0.0
23
+ self.max_tokens = 800
24
  self.web_search = web_search_tool
25
  self.calculator = calculator_tool
26
  self.file_reader = read_file_tool
27
 
28
  def answer(self, question: str, task_id: str = None) -> str:
29
  try:
30
+ tool_results = []
31
+
32
  if task_id:
33
+ tool_results.append(("File", self.file_reader.run(task_id)))
34
 
 
35
  if any(kw in question.lower() for kw in ["wikipedia", "search", "google", "find", "ioc", "lookup"]):
36
+ tool_results.append(("Web", self.web_search.run(question)))
37
 
 
38
  if any(kw in question.lower() for kw in ["calculate", "math", "sum", "total", "add", "subtract", "multiply", "divide"]):
39
+ tool_results.append(("Calc", self.calculator.run(question)))
40
+
41
+ tool_context = "\n".join([f"{name} result: {result}" for name, result in tool_results if result])
42
+
 
 
 
 
 
 
 
 
43
  prompt = f"""
44
+ You are an advanced AI agent answering complex questions. Follow these strict rules:
45
+ 1. Always use the available tools for factual and verified answers.
46
+ 2. Combine multiple tools when necessary.
47
+ 3. Return the answer in the requested format only (e.g., single name, city, number, or code). No explanations.
48
+ 4. If you cannot answer, say exactly: 'No answer found.'
49
  5. Do not hallucinate or guess.
50
 
51
  Question: {question}
 
63
  def query_llm(self, prompt: str) -> str:
64
  completion = self.client.chat.completions.create(
65
  model=self.model,
66
+ messages=[{"role": "user", "content": prompt.strip()}],
67
  temperature=self.temperature,
68
  max_tokens=self.max_tokens
69
  )
tools/file_reader.py CHANGED
@@ -1,55 +1,54 @@
1
  import requests
2
  import pandas as pd
 
3
  import PyPDF2
4
- import json
5
- from io import BytesIO
6
  from langchain.tools import Tool
7
 
8
  def read_file(task_id: str) -> str:
 
 
 
 
9
  try:
10
  url = f"https://agents-course-unit4-scoring.hf.space/files/{task_id}"
11
- response = requests.get(url)
12
  response.raise_for_status()
13
 
14
  content_type = response.headers.get("Content-Type", "").lower()
15
- file_bytes = BytesIO(response.content)
16
 
17
- if "excel" in content_type or task_id.endswith((".xls", ".xlsx")):
18
  df = pd.read_excel(file_bytes)
19
- return summarize_dataframe(df)
 
 
 
 
20
 
21
  elif "csv" in content_type or task_id.endswith(".csv"):
22
  df = pd.read_csv(file_bytes)
23
- return summarize_dataframe(df)
 
 
 
 
24
 
25
  elif "pdf" in content_type or task_id.endswith(".pdf"):
26
  pdf = PyPDF2.PdfReader(file_bytes)
27
  text = "\n".join(page.extract_text() for page in pdf.pages if page.extract_text())
28
  return text.strip() if text else "No text extracted from PDF."
29
 
30
- elif "json" in content_type or task_id.endswith(".json"):
31
- data = json.load(file_bytes)
32
- return json.dumps(data, indent=2)
33
-
34
- elif any(task_id.endswith(ext) for ext in [".txt", ".py", ".md"]):
35
- return response.content.decode("utf-8", errors="ignore")
36
 
37
  else:
38
- return response.content.decode("utf-8", errors="ignore")
39
 
40
  except Exception as e:
41
  return f"[File reader error: {e}]"
42
 
43
- def summarize_dataframe(df: pd.DataFrame) -> str:
44
- numeric_cols = df.select_dtypes(include='number').columns
45
- if not numeric_cols.empty:
46
- total = df[numeric_cols].sum().sum()
47
- return f"Total of numeric columns: {total:.2f}"
48
- else:
49
- return df.head().to_string(index=False)
50
-
51
  read_file_tool = Tool.from_function(
52
  name="read_file",
53
- description="Reads the content of a file based on the task_id",
54
  func=read_file
55
  )
 
1
  import requests
2
  import pandas as pd
3
+ import io
4
  import PyPDF2
 
 
5
  from langchain.tools import Tool
6
 
7
  def read_file(task_id: str) -> str:
8
+ """
9
+ Downloads and reads the content of a file by task_id from the evaluation server.
10
+ Handles Excel, CSV, PDF, TXT, JSON, Python, Markdown, and generic files.
11
+ """
12
  try:
13
  url = f"https://agents-course-unit4-scoring.hf.space/files/{task_id}"
14
+ response = requests.get(url, timeout=15)
15
  response.raise_for_status()
16
 
17
  content_type = response.headers.get("Content-Type", "").lower()
18
+ file_bytes = io.BytesIO(response.content)
19
 
20
+ if "excel" in content_type or task_id.endswith((".xlsx", ".xls")):
21
  df = pd.read_excel(file_bytes)
22
+ numeric_cols = df.select_dtypes(include='number').columns
23
+ if not numeric_cols.empty:
24
+ total = df[numeric_cols].sum().sum()
25
+ return f"{total:.2f}"
26
+ return df.to_string(index=False, header=True)
27
 
28
  elif "csv" in content_type or task_id.endswith(".csv"):
29
  df = pd.read_csv(file_bytes)
30
+ numeric_cols = df.select_dtypes(include='number').columns
31
+ if not numeric_cols.empty:
32
+ total = df[numeric_cols].sum().sum()
33
+ return f"{total:.2f}"
34
+ return df.to_string(index=False, header=True)
35
 
36
  elif "pdf" in content_type or task_id.endswith(".pdf"):
37
  pdf = PyPDF2.PdfReader(file_bytes)
38
  text = "\n".join(page.extract_text() for page in pdf.pages if page.extract_text())
39
  return text.strip() if text else "No text extracted from PDF."
40
 
41
+ elif any(task_id.endswith(ext) for ext in [".txt", ".py", ".md", ".json"]):
42
+ return response.content.decode('utf-8', errors='ignore').strip()
 
 
 
 
43
 
44
  else:
45
+ return response.content.decode('utf-8', errors='ignore').strip()
46
 
47
  except Exception as e:
48
  return f"[File reader error: {e}]"
49
 
 
 
 
 
 
 
 
 
50
  read_file_tool = Tool.from_function(
51
  name="read_file",
52
+ description="Reads the content of a file based on task_id (Excel, CSV, PDF, text, code, etc.)",
53
  func=read_file
54
  )
tools/web_searcher.py CHANGED
@@ -1,48 +1,44 @@
1
  import requests
2
  import os
3
- from dotenv import load_dotenv
4
  from langchain.tools import Tool
 
5
 
6
  load_dotenv()
7
- serper_api_key = os.getenv("SERPER_API_KEY")
8
- serper_api_url = "https://google.serper.dev/search"
 
9
 
10
  def web_search(query: str) -> str:
11
- if not serper_api_key:
12
- return "Error: SERPER_API_KEY is not set."
13
 
14
- headers = {
15
- "X-API-KEY": serper_api_key,
16
- "Content-Type": "application/json"
17
- }
18
 
19
- payload = { "q": query }
20
  try:
21
- response = requests.post(serper_api_url, headers=headers, json=payload, timeout=15)
22
  response.raise_for_status()
23
  data = response.json()
24
 
 
 
 
25
  results = []
26
- if data.get("organic"):
27
- for entry in data["organic"]:
28
- snippet = entry.get("snippet")
29
- link = entry.get("link")
30
- if snippet and link:
31
- results.append(f"{snippet} (Source: {link})")
32
- elif snippet:
33
- results.append(snippet)
34
-
35
- if results:
36
- return "\n".join(results[:5])
37
- else:
38
- return "No search results found."
39
-
40
- except Exception as e:
41
- return f"[Web search error: {e}]"
42
 
43
  web_search_tool = Tool.from_function(
44
  name="web_search",
45
- description="Searches the web using Serper API and provides relevant information.",
46
  func=web_search
47
  )
48
 
 
1
  import requests
2
  import os
 
3
  from langchain.tools import Tool
4
+ from dotenv import load_dotenv
5
 
6
  load_dotenv()
7
+
8
+ SERPER_API_KEY = os.getenv("SERPER_API_KEY")
9
+ SERPER_API_URL = "https://google.serper.dev/search"
10
 
11
  def web_search(query: str) -> str:
12
+ if not SERPER_API_KEY:
13
+ return "[Error: SERPER_API_KEY is not set]"
14
 
15
+ headers = {"X-API-KEY": SERPER_API_KEY, "Content-Type": "application/json"}
16
+ payload = {"q": query, "num": 3}
 
 
17
 
 
18
  try:
19
+ response = requests.post(SERPER_API_URL, headers=headers, json=payload, timeout=10)
20
  response.raise_for_status()
21
  data = response.json()
22
 
23
+ if not data.get("organic"):
24
+ return "No relevant results found."
25
+
26
  results = []
27
+ for item in data["organic"]:
28
+ snippet = item.get("snippet")
29
+ if snippet:
30
+ results.append(snippet)
31
+
32
+ return "\n".join(results[:3]) if results else "No snippet found."
33
+
34
+ except requests.exceptions.Timeout:
35
+ return "[Error: Request timed out.]"
36
+ except requests.exceptions.RequestException as e:
37
+ return f"[Error: {e}]"
 
 
 
 
 
38
 
39
  web_search_tool = Tool.from_function(
40
  name="web_search",
41
+ description="Search the web for information using Google Search.",
42
  func=web_search
43
  )
44