Spaces:
Sleeping
Sleeping
improved tools
Browse files- app.py +17 -27
- tools/file_reader.py +22 -23
- tools/web_searcher.py +24 -28
app.py
CHANGED
|
@@ -3,13 +3,13 @@ import gradio as gr
|
|
| 3 |
import requests
|
| 4 |
import re
|
| 5 |
import pandas as pd
|
| 6 |
-
from io import BytesIO
|
| 7 |
-
from dotenv import load_dotenv
|
| 8 |
from openai import OpenAI
|
| 9 |
from tools.web_searcher import web_search_tool
|
| 10 |
from tools.calculator import calculator_tool
|
| 11 |
from tools.file_reader import read_file_tool
|
| 12 |
|
|
|
|
|
|
|
| 13 |
load_dotenv()
|
| 14 |
|
| 15 |
DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
|
|
@@ -20,42 +20,32 @@ class ToolUsingAgent:
|
|
| 20 |
self.client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
|
| 21 |
self.model = "gpt-4o"
|
| 22 |
self.temperature = 0.0
|
| 23 |
-
self.max_tokens =
|
| 24 |
self.web_search = web_search_tool
|
| 25 |
self.calculator = calculator_tool
|
| 26 |
self.file_reader = read_file_tool
|
| 27 |
|
| 28 |
def answer(self, question: str, task_id: str = None) -> str:
|
| 29 |
try:
|
| 30 |
-
|
| 31 |
-
|
| 32 |
if task_id:
|
| 33 |
-
|
| 34 |
|
| 35 |
-
web_result = None
|
| 36 |
if any(kw in question.lower() for kw in ["wikipedia", "search", "google", "find", "ioc", "lookup"]):
|
| 37 |
-
|
| 38 |
|
| 39 |
-
calc_result = None
|
| 40 |
if any(kw in question.lower() for kw in ["calculate", "math", "sum", "total", "add", "subtract", "multiply", "divide"]):
|
| 41 |
-
|
| 42 |
-
|
| 43 |
-
|
| 44 |
-
|
| 45 |
-
if file_result:
|
| 46 |
-
tool_context += f"\nFile content: {file_result}"
|
| 47 |
-
if web_result:
|
| 48 |
-
tool_context += f"\nWeb search result: {web_result}"
|
| 49 |
-
if calc_result:
|
| 50 |
-
tool_context += f"\nCalculation result: {calc_result}"
|
| 51 |
-
|
| 52 |
-
# Step 3: Generate the prompt for LLM
|
| 53 |
prompt = f"""
|
| 54 |
-
You are an
|
| 55 |
-
1.
|
| 56 |
-
2. Combine tools
|
| 57 |
-
3. Return
|
| 58 |
-
4. If you cannot answer,
|
| 59 |
5. Do not hallucinate or guess.
|
| 60 |
|
| 61 |
Question: {question}
|
|
@@ -73,7 +63,7 @@ Answer:"""
|
|
| 73 |
def query_llm(self, prompt: str) -> str:
|
| 74 |
completion = self.client.chat.completions.create(
|
| 75 |
model=self.model,
|
| 76 |
-
messages=[{"role": "user", "content": prompt}],
|
| 77 |
temperature=self.temperature,
|
| 78 |
max_tokens=self.max_tokens
|
| 79 |
)
|
|
|
|
| 3 |
import requests
|
| 4 |
import re
|
| 5 |
import pandas as pd
|
|
|
|
|
|
|
| 6 |
from openai import OpenAI
|
| 7 |
from tools.web_searcher import web_search_tool
|
| 8 |
from tools.calculator import calculator_tool
|
| 9 |
from tools.file_reader import read_file_tool
|
| 10 |
|
| 11 |
+
from dotenv import load_dotenv
|
| 12 |
+
|
| 13 |
load_dotenv()
|
| 14 |
|
| 15 |
DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
|
|
|
|
| 20 |
self.client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
|
| 21 |
self.model = "gpt-4o"
|
| 22 |
self.temperature = 0.0
|
| 23 |
+
self.max_tokens = 800
|
| 24 |
self.web_search = web_search_tool
|
| 25 |
self.calculator = calculator_tool
|
| 26 |
self.file_reader = read_file_tool
|
| 27 |
|
| 28 |
def answer(self, question: str, task_id: str = None) -> str:
|
| 29 |
try:
|
| 30 |
+
tool_results = []
|
| 31 |
+
|
| 32 |
if task_id:
|
| 33 |
+
tool_results.append(("File", self.file_reader.run(task_id)))
|
| 34 |
|
|
|
|
| 35 |
if any(kw in question.lower() for kw in ["wikipedia", "search", "google", "find", "ioc", "lookup"]):
|
| 36 |
+
tool_results.append(("Web", self.web_search.run(question)))
|
| 37 |
|
|
|
|
| 38 |
if any(kw in question.lower() for kw in ["calculate", "math", "sum", "total", "add", "subtract", "multiply", "divide"]):
|
| 39 |
+
tool_results.append(("Calc", self.calculator.run(question)))
|
| 40 |
+
|
| 41 |
+
tool_context = "\n".join([f"{name} result: {result}" for name, result in tool_results if result])
|
| 42 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 43 |
prompt = f"""
|
| 44 |
+
You are an advanced AI agent answering complex questions. Follow these strict rules:
|
| 45 |
+
1. Always use the available tools for factual and verified answers.
|
| 46 |
+
2. Combine multiple tools when necessary.
|
| 47 |
+
3. Return the answer in the requested format only (e.g., single name, city, number, or code). No explanations.
|
| 48 |
+
4. If you cannot answer, say exactly: 'No answer found.'
|
| 49 |
5. Do not hallucinate or guess.
|
| 50 |
|
| 51 |
Question: {question}
|
|
|
|
| 63 |
def query_llm(self, prompt: str) -> str:
|
| 64 |
completion = self.client.chat.completions.create(
|
| 65 |
model=self.model,
|
| 66 |
+
messages=[{"role": "user", "content": prompt.strip()}],
|
| 67 |
temperature=self.temperature,
|
| 68 |
max_tokens=self.max_tokens
|
| 69 |
)
|
tools/file_reader.py
CHANGED
|
@@ -1,55 +1,54 @@
|
|
| 1 |
import requests
|
| 2 |
import pandas as pd
|
|
|
|
| 3 |
import PyPDF2
|
| 4 |
-
import json
|
| 5 |
-
from io import BytesIO
|
| 6 |
from langchain.tools import Tool
|
| 7 |
|
| 8 |
def read_file(task_id: str) -> str:
|
|
|
|
|
|
|
|
|
|
|
|
|
| 9 |
try:
|
| 10 |
url = f"https://agents-course-unit4-scoring.hf.space/files/{task_id}"
|
| 11 |
-
response = requests.get(url)
|
| 12 |
response.raise_for_status()
|
| 13 |
|
| 14 |
content_type = response.headers.get("Content-Type", "").lower()
|
| 15 |
-
file_bytes = BytesIO(response.content)
|
| 16 |
|
| 17 |
-
if "excel" in content_type or task_id.endswith((".
|
| 18 |
df = pd.read_excel(file_bytes)
|
| 19 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 20 |
|
| 21 |
elif "csv" in content_type or task_id.endswith(".csv"):
|
| 22 |
df = pd.read_csv(file_bytes)
|
| 23 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 24 |
|
| 25 |
elif "pdf" in content_type or task_id.endswith(".pdf"):
|
| 26 |
pdf = PyPDF2.PdfReader(file_bytes)
|
| 27 |
text = "\n".join(page.extract_text() for page in pdf.pages if page.extract_text())
|
| 28 |
return text.strip() if text else "No text extracted from PDF."
|
| 29 |
|
| 30 |
-
elif
|
| 31 |
-
|
| 32 |
-
return json.dumps(data, indent=2)
|
| 33 |
-
|
| 34 |
-
elif any(task_id.endswith(ext) for ext in [".txt", ".py", ".md"]):
|
| 35 |
-
return response.content.decode("utf-8", errors="ignore")
|
| 36 |
|
| 37 |
else:
|
| 38 |
-
return response.content.decode(
|
| 39 |
|
| 40 |
except Exception as e:
|
| 41 |
return f"[File reader error: {e}]"
|
| 42 |
|
| 43 |
-
def summarize_dataframe(df: pd.DataFrame) -> str:
|
| 44 |
-
numeric_cols = df.select_dtypes(include='number').columns
|
| 45 |
-
if not numeric_cols.empty:
|
| 46 |
-
total = df[numeric_cols].sum().sum()
|
| 47 |
-
return f"Total of numeric columns: {total:.2f}"
|
| 48 |
-
else:
|
| 49 |
-
return df.head().to_string(index=False)
|
| 50 |
-
|
| 51 |
read_file_tool = Tool.from_function(
|
| 52 |
name="read_file",
|
| 53 |
-
description="Reads the content of a file based on
|
| 54 |
func=read_file
|
| 55 |
)
|
|
|
|
| 1 |
import requests
|
| 2 |
import pandas as pd
|
| 3 |
+
import io
|
| 4 |
import PyPDF2
|
|
|
|
|
|
|
| 5 |
from langchain.tools import Tool
|
| 6 |
|
| 7 |
def read_file(task_id: str) -> str:
|
| 8 |
+
"""
|
| 9 |
+
Downloads and reads the content of a file by task_id from the evaluation server.
|
| 10 |
+
Handles Excel, CSV, PDF, TXT, JSON, Python, Markdown, and generic files.
|
| 11 |
+
"""
|
| 12 |
try:
|
| 13 |
url = f"https://agents-course-unit4-scoring.hf.space/files/{task_id}"
|
| 14 |
+
response = requests.get(url, timeout=15)
|
| 15 |
response.raise_for_status()
|
| 16 |
|
| 17 |
content_type = response.headers.get("Content-Type", "").lower()
|
| 18 |
+
file_bytes = io.BytesIO(response.content)
|
| 19 |
|
| 20 |
+
if "excel" in content_type or task_id.endswith((".xlsx", ".xls")):
|
| 21 |
df = pd.read_excel(file_bytes)
|
| 22 |
+
numeric_cols = df.select_dtypes(include='number').columns
|
| 23 |
+
if not numeric_cols.empty:
|
| 24 |
+
total = df[numeric_cols].sum().sum()
|
| 25 |
+
return f"{total:.2f}"
|
| 26 |
+
return df.to_string(index=False, header=True)
|
| 27 |
|
| 28 |
elif "csv" in content_type or task_id.endswith(".csv"):
|
| 29 |
df = pd.read_csv(file_bytes)
|
| 30 |
+
numeric_cols = df.select_dtypes(include='number').columns
|
| 31 |
+
if not numeric_cols.empty:
|
| 32 |
+
total = df[numeric_cols].sum().sum()
|
| 33 |
+
return f"{total:.2f}"
|
| 34 |
+
return df.to_string(index=False, header=True)
|
| 35 |
|
| 36 |
elif "pdf" in content_type or task_id.endswith(".pdf"):
|
| 37 |
pdf = PyPDF2.PdfReader(file_bytes)
|
| 38 |
text = "\n".join(page.extract_text() for page in pdf.pages if page.extract_text())
|
| 39 |
return text.strip() if text else "No text extracted from PDF."
|
| 40 |
|
| 41 |
+
elif any(task_id.endswith(ext) for ext in [".txt", ".py", ".md", ".json"]):
|
| 42 |
+
return response.content.decode('utf-8', errors='ignore').strip()
|
|
|
|
|
|
|
|
|
|
|
|
|
| 43 |
|
| 44 |
else:
|
| 45 |
+
return response.content.decode('utf-8', errors='ignore').strip()
|
| 46 |
|
| 47 |
except Exception as e:
|
| 48 |
return f"[File reader error: {e}]"
|
| 49 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 50 |
read_file_tool = Tool.from_function(
|
| 51 |
name="read_file",
|
| 52 |
+
description="Reads the content of a file based on task_id (Excel, CSV, PDF, text, code, etc.)",
|
| 53 |
func=read_file
|
| 54 |
)
|
tools/web_searcher.py
CHANGED
|
@@ -1,48 +1,44 @@
|
|
| 1 |
import requests
|
| 2 |
import os
|
| 3 |
-
from dotenv import load_dotenv
|
| 4 |
from langchain.tools import Tool
|
|
|
|
| 5 |
|
| 6 |
load_dotenv()
|
| 7 |
-
|
| 8 |
-
|
|
|
|
| 9 |
|
| 10 |
def web_search(query: str) -> str:
|
| 11 |
-
if not
|
| 12 |
-
return "Error: SERPER_API_KEY is not set
|
| 13 |
|
| 14 |
-
headers = {
|
| 15 |
-
|
| 16 |
-
"Content-Type": "application/json"
|
| 17 |
-
}
|
| 18 |
|
| 19 |
-
payload = { "q": query }
|
| 20 |
try:
|
| 21 |
-
response = requests.post(
|
| 22 |
response.raise_for_status()
|
| 23 |
data = response.json()
|
| 24 |
|
|
|
|
|
|
|
|
|
|
| 25 |
results = []
|
| 26 |
-
|
| 27 |
-
|
| 28 |
-
|
| 29 |
-
|
| 30 |
-
|
| 31 |
-
|
| 32 |
-
|
| 33 |
-
|
| 34 |
-
|
| 35 |
-
|
| 36 |
-
|
| 37 |
-
else:
|
| 38 |
-
return "No search results found."
|
| 39 |
-
|
| 40 |
-
except Exception as e:
|
| 41 |
-
return f"[Web search error: {e}]"
|
| 42 |
|
| 43 |
web_search_tool = Tool.from_function(
|
| 44 |
name="web_search",
|
| 45 |
-
description="
|
| 46 |
func=web_search
|
| 47 |
)
|
| 48 |
|
|
|
|
| 1 |
import requests
|
| 2 |
import os
|
|
|
|
| 3 |
from langchain.tools import Tool
|
| 4 |
+
from dotenv import load_dotenv
|
| 5 |
|
| 6 |
load_dotenv()
|
| 7 |
+
|
| 8 |
+
SERPER_API_KEY = os.getenv("SERPER_API_KEY")
|
| 9 |
+
SERPER_API_URL = "https://google.serper.dev/search"
|
| 10 |
|
| 11 |
def web_search(query: str) -> str:
|
| 12 |
+
if not SERPER_API_KEY:
|
| 13 |
+
return "[Error: SERPER_API_KEY is not set]"
|
| 14 |
|
| 15 |
+
headers = {"X-API-KEY": SERPER_API_KEY, "Content-Type": "application/json"}
|
| 16 |
+
payload = {"q": query, "num": 3}
|
|
|
|
|
|
|
| 17 |
|
|
|
|
| 18 |
try:
|
| 19 |
+
response = requests.post(SERPER_API_URL, headers=headers, json=payload, timeout=10)
|
| 20 |
response.raise_for_status()
|
| 21 |
data = response.json()
|
| 22 |
|
| 23 |
+
if not data.get("organic"):
|
| 24 |
+
return "No relevant results found."
|
| 25 |
+
|
| 26 |
results = []
|
| 27 |
+
for item in data["organic"]:
|
| 28 |
+
snippet = item.get("snippet")
|
| 29 |
+
if snippet:
|
| 30 |
+
results.append(snippet)
|
| 31 |
+
|
| 32 |
+
return "\n".join(results[:3]) if results else "No snippet found."
|
| 33 |
+
|
| 34 |
+
except requests.exceptions.Timeout:
|
| 35 |
+
return "[Error: Request timed out.]"
|
| 36 |
+
except requests.exceptions.RequestException as e:
|
| 37 |
+
return f"[Error: {e}]"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 38 |
|
| 39 |
web_search_tool = Tool.from_function(
|
| 40 |
name="web_search",
|
| 41 |
+
description="Search the web for information using Google Search.",
|
| 42 |
func=web_search
|
| 43 |
)
|
| 44 |
|