Spaces:

ana-35
/

agents-final-assessment

Sleeping

App Files Files Community

ana-35 commited on May 25, 2025

Commit

64188e4

1 Parent(s): 5c082e6

improved tools

Browse files

Files changed (3) hide show

app.py +17 -27
tools/file_reader.py +22 -23
tools/web_searcher.py +24 -28

app.py CHANGED Viewed

@@ -3,13 +3,13 @@ import gradio as gr
 import requests
 import re
 import pandas as pd
-from io import BytesIO
-from dotenv import load_dotenv
 from openai import OpenAI
 from tools.web_searcher import web_search_tool
 from tools.calculator import calculator_tool
 from tools.file_reader import read_file_tool
 load_dotenv()
 DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
@@ -20,42 +20,32 @@ class ToolUsingAgent:
         self.client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
         self.model = "gpt-4o"
         self.temperature = 0.0
-        self.max_tokens = 1000
         self.web_search = web_search_tool
         self.calculator = calculator_tool
         self.file_reader = read_file_tool
     def answer(self, question: str, task_id: str = None) -> str:
         try:
-            # Step 1: Collect tool results if applicable
-            file_result = None
             if task_id:
-                file_result = self.file_reader.run(task_id)
-            web_result = None
             if any(kw in question.lower() for kw in ["wikipedia", "search", "google", "find", "ioc", "lookup"]):
-                web_result = self.web_search.run(question)
-            calc_result = None
             if any(kw in question.lower() for kw in ["calculate", "math", "sum", "total", "add", "subtract", "multiply", "divide"]):
-                calc_result = self.calculator.run(question)
-            # Step 2: Build tool context dynamically
-            tool_context = ""
-            if file_result:
-                tool_context += f"\nFile content: {file_result}"
-            if web_result:
-                tool_context += f"\nWeb search result: {web_result}"
-            if calc_result:
-                tool_context += f"\nCalculation result: {calc_result}"
-            # Step 3: Generate the prompt for LLM
             prompt = f"""
-You are an expert AI agent solving complex questions. Follow these strict rules:
-1. Use the tools when necessary: web_search, file_reader, calculator.
-2. Combine tools if required for multi-step questions.
-3. Return only the requested format: single name, number, city, or code. No explanations, no extra text.
-4. If you cannot answer, reply exactly: 'No answer found.'
 5. Do not hallucinate or guess.
 Question: {question}
@@ -73,7 +63,7 @@ Answer:"""
     def query_llm(self, prompt: str) -> str:
         completion = self.client.chat.completions.create(
             model=self.model,
-            messages=[{"role": "user", "content": prompt}],
             temperature=self.temperature,
             max_tokens=self.max_tokens
         )

 import requests
 import re
 import pandas as pd
 from openai import OpenAI
 from tools.web_searcher import web_search_tool
 from tools.calculator import calculator_tool
 from tools.file_reader import read_file_tool
+from dotenv import load_dotenv
 load_dotenv()
 DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
         self.client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
         self.model = "gpt-4o"
         self.temperature = 0.0
+        self.max_tokens = 800
         self.web_search = web_search_tool
         self.calculator = calculator_tool
         self.file_reader = read_file_tool
     def answer(self, question: str, task_id: str = None) -> str:
         try:
+            tool_results = []
             if task_id:
+                tool_results.append(("File", self.file_reader.run(task_id)))
             if any(kw in question.lower() for kw in ["wikipedia", "search", "google", "find", "ioc", "lookup"]):
+                tool_results.append(("Web", self.web_search.run(question)))
             if any(kw in question.lower() for kw in ["calculate", "math", "sum", "total", "add", "subtract", "multiply", "divide"]):
+                tool_results.append(("Calc", self.calculator.run(question)))
+            tool_context = "\n".join([f"{name} result: {result}" for name, result in tool_results if result])
             prompt = f"""
+You are an advanced AI agent answering complex questions. Follow these strict rules:
+1. Always use the available tools for factual and verified answers.
+2. Combine multiple tools when necessary.
+3. Return the answer in the requested format only (e.g., single name, city, number, or code). No explanations.
+4. If you cannot answer, say exactly: 'No answer found.'
 5. Do not hallucinate or guess.
 Question: {question}
     def query_llm(self, prompt: str) -> str:
         completion = self.client.chat.completions.create(
             model=self.model,
+            messages=[{"role": "user", "content": prompt.strip()}],
             temperature=self.temperature,
             max_tokens=self.max_tokens
         )

tools/file_reader.py CHANGED Viewed

@@ -1,55 +1,54 @@
 import requests
 import pandas as pd
 import PyPDF2
-import json
-from io import BytesIO
 from langchain.tools import Tool
 def read_file(task_id: str) -> str:
     try:
         url = f"https://agents-course-unit4-scoring.hf.space/files/{task_id}"
-        response = requests.get(url)
         response.raise_for_status()
         content_type = response.headers.get("Content-Type", "").lower()
-        file_bytes = BytesIO(response.content)
-        if "excel" in content_type or task_id.endswith((".xls", ".xlsx")):
             df = pd.read_excel(file_bytes)
-            return summarize_dataframe(df)
         elif "csv" in content_type or task_id.endswith(".csv"):
             df = pd.read_csv(file_bytes)
-            return summarize_dataframe(df)
         elif "pdf" in content_type or task_id.endswith(".pdf"):
             pdf = PyPDF2.PdfReader(file_bytes)
             text = "\n".join(page.extract_text() for page in pdf.pages if page.extract_text())
             return text.strip() if text else "No text extracted from PDF."
-        elif "json" in content_type or task_id.endswith(".json"):
-            data = json.load(file_bytes)
-            return json.dumps(data, indent=2)
-        elif any(task_id.endswith(ext) for ext in [".txt", ".py", ".md"]):
-            return response.content.decode("utf-8", errors="ignore")
         else:
-            return response.content.decode("utf-8", errors="ignore")
     except Exception as e:
         return f"[File reader error: {e}]"
-def summarize_dataframe(df: pd.DataFrame) -> str:
-    numeric_cols = df.select_dtypes(include='number').columns
-    if not numeric_cols.empty:
-        total = df[numeric_cols].sum().sum()
-        return f"Total of numeric columns: {total:.2f}"
-    else:
-        return df.head().to_string(index=False)
 read_file_tool = Tool.from_function(
     name="read_file",
-    description="Reads the content of a file based on the task_id",
     func=read_file
 )

 import requests
 import pandas as pd
+import io
 import PyPDF2
 from langchain.tools import Tool
 def read_file(task_id: str) -> str:
+    """
+    Downloads and reads the content of a file by task_id from the evaluation server.
+    Handles Excel, CSV, PDF, TXT, JSON, Python, Markdown, and generic files.
+    """
     try:
         url = f"https://agents-course-unit4-scoring.hf.space/files/{task_id}"
+        response = requests.get(url, timeout=15)
         response.raise_for_status()
         content_type = response.headers.get("Content-Type", "").lower()
+        file_bytes = io.BytesIO(response.content)
+        if "excel" in content_type or task_id.endswith((".xlsx", ".xls")):
             df = pd.read_excel(file_bytes)
+            numeric_cols = df.select_dtypes(include='number').columns
+            if not numeric_cols.empty:
+                total = df[numeric_cols].sum().sum()
+                return f"{total:.2f}"
+            return df.to_string(index=False, header=True)
         elif "csv" in content_type or task_id.endswith(".csv"):
             df = pd.read_csv(file_bytes)
+            numeric_cols = df.select_dtypes(include='number').columns
+            if not numeric_cols.empty:
+                total = df[numeric_cols].sum().sum()
+                return f"{total:.2f}"
+            return df.to_string(index=False, header=True)
         elif "pdf" in content_type or task_id.endswith(".pdf"):
             pdf = PyPDF2.PdfReader(file_bytes)
             text = "\n".join(page.extract_text() for page in pdf.pages if page.extract_text())
             return text.strip() if text else "No text extracted from PDF."
+        elif any(task_id.endswith(ext) for ext in [".txt", ".py", ".md", ".json"]):
+            return response.content.decode('utf-8', errors='ignore').strip()
         else:
+            return response.content.decode('utf-8', errors='ignore').strip()
     except Exception as e:
         return f"[File reader error: {e}]"
 read_file_tool = Tool.from_function(
     name="read_file",
+    description="Reads the content of a file based on task_id (Excel, CSV, PDF, text, code, etc.)",
     func=read_file
 )

tools/web_searcher.py CHANGED Viewed

@@ -1,48 +1,44 @@
 import requests
 import os
-from dotenv import load_dotenv
 from langchain.tools import Tool
 load_dotenv()
-serper_api_key = os.getenv("SERPER_API_KEY")
-serper_api_url = "https://google.serper.dev/search"
 def web_search(query: str) -> str:
-    if not serper_api_key:
-        return "Error: SERPER_API_KEY is not set."
-    headers = {
-        "X-API-KEY": serper_api_key,
-        "Content-Type": "application/json"
-    }
-    payload = { "q": query }
     try:
-        response = requests.post(serper_api_url, headers=headers, json=payload, timeout=15)
         response.raise_for_status()
         data = response.json()
         results = []
-        if data.get("organic"):
-            for entry in data["organic"]:
-                snippet = entry.get("snippet")
-                link = entry.get("link")
-                if snippet and link:
-                    results.append(f"{snippet} (Source: {link})")
-                elif snippet:
-                    results.append(snippet)
-        if results:
-            return "\n".join(results[:5])
-        else:
-            return "No search results found."
-    except Exception as e:
-        return f"[Web search error: {e}]"
 web_search_tool = Tool.from_function(
     name="web_search",
-    description="Searches the web using Serper API and provides relevant information.",
     func=web_search
 )

 import requests
 import os
 from langchain.tools import Tool
+from dotenv import load_dotenv
 load_dotenv()
+SERPER_API_KEY = os.getenv("SERPER_API_KEY")
+SERPER_API_URL = "https://google.serper.dev/search"
 def web_search(query: str) -> str:
+    if not SERPER_API_KEY:
+        return "[Error: SERPER_API_KEY is not set]"
+    headers = {"X-API-KEY": SERPER_API_KEY, "Content-Type": "application/json"}
+    payload = {"q": query, "num": 3}
     try:
+        response = requests.post(SERPER_API_URL, headers=headers, json=payload, timeout=10)
         response.raise_for_status()
         data = response.json()
+        if not data.get("organic"):
+            return "No relevant results found."
         results = []
+        for item in data["organic"]:
+            snippet = item.get("snippet")
+            if snippet:
+                results.append(snippet)
+        return "\n".join(results[:3]) if results else "No snippet found."
+    except requests.exceptions.Timeout:
+        return "[Error: Request timed out.]"
+    except requests.exceptions.RequestException as e:
+        return f"[Error: {e}]"
 web_search_tool = Tool.from_function(
     name="web_search",
+    description="Search the web for information using Google Search.",
     func=web_search
 )