santimber commited on
Commit
973f005
·
1 Parent(s): 580858a

changes, new tools

Browse files
Files changed (4) hide show
  1. __pycache__/tools.cpython-311.pyc +0 -0
  2. app.py +109 -24
  3. requirements.txt +4 -4
  4. tools.py +397 -180
__pycache__/tools.cpython-311.pyc CHANGED
Binary files a/__pycache__/tools.cpython-311.pyc and b/__pycache__/tools.cpython-311.pyc differ
 
app.py CHANGED
@@ -11,14 +11,14 @@ from langgraph.graph.message import add_messages
11
  from typing import TypedDict, Annotated
12
  from tools import (
13
  image_recognition_tool,
14
- download_file_tool,
15
  reverse_text_tool,
16
- hub_stats_tool,
17
- web_search_tool,
18
  python_execution_tool,
19
  video_analysis_tool,
20
  audio_processing_tool,
21
- file_type_detection_tool
 
22
  )
23
 
24
  # (Keep Constants as is)
@@ -28,15 +28,15 @@ DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
28
  # Setting up the llm
29
  llm = ChatOpenAI(model="gpt-3.5-turbo", temperature=0)
30
  tools = [
31
- web_search_tool,
32
- hub_stats_tool,
33
- download_file_tool,
34
  image_recognition_tool,
35
  reverse_text_tool,
36
  python_execution_tool,
37
  video_analysis_tool,
38
  audio_processing_tool,
39
- file_type_detection_tool
 
40
  ]
41
  chat_with_tools = llm.bind_tools(tools)
42
 
@@ -47,18 +47,79 @@ class MyAgent(TypedDict):
47
  messages: Annotated[list[AnyMessage], add_messages]
48
 
49
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
50
  class AgentWrapper:
51
  def __init__(self):
52
  print("AgentWrapper initialized.")
53
 
54
- def __call__(self, question: str) -> str:
55
- print(f"Agent received question (first 50 chars): {question[:50]}...")
56
-
57
- # Use the LangGraph agent to process the question
 
 
 
 
 
58
  try:
59
  result = my_agent.invoke(
60
- {"messages": [HumanMessage(content=question)]})
61
- # Get the last message from the result
62
  last_message = result["messages"][-1]
63
  answer = last_message.content
64
  print(f"Agent returning answer: {answer}")
@@ -78,16 +139,40 @@ Your answer should be a number OR as few words as possible OR a comma-separated
78
  If you're asked for a number, don't use commas or units like $ or %, unless specified.
79
  If you're asked for a string, don't use articles or abbreviations (e.g. for cities), and write digits in plain text unless told otherwise.
80
 
81
- Tool Use Guidelines:
82
- 1. Do **not** use any tools outside of the provided tools list.
83
- 2. Always use **only one tool at a time** in each step of your execution.
84
- 3. For HuggingFace Hub information (models, authors, downloads), use **get_hub_stats** tool.
85
- 4. For web searches and current information, use **web_search_tool** .
86
- 5. If the question looks reversed (starts with a period or reads backward), first use **reverse_text_tool** to reverse it, then process the question.
87
- 6. When you need to download files from URLs, use **download_file_tool**.
88
- 7. For image analysis and description, use **image_recognition_tool** (requires OpenAI API key).
89
- 8. Even for complex tasks, assume a solution exists. If one method fails, try another approach using different tools.
90
- 9. Keep responses concise and efficient.""")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
91
 
92
  # Combine system message with user messages
93
  all_messages = [system_message] + state["messages"]
 
11
  from typing import TypedDict, Annotated
12
  from tools import (
13
  image_recognition_tool,
14
+ download_file,
15
  reverse_text_tool,
16
+ serp_search_tool,
 
17
  python_execution_tool,
18
  video_analysis_tool,
19
  audio_processing_tool,
20
+ file_type_detection_tool,
21
+ read_file_tool
22
  )
23
 
24
  # (Keep Constants as is)
 
28
  # Setting up the llm
29
  llm = ChatOpenAI(model="gpt-3.5-turbo", temperature=0)
30
  tools = [
31
+ serp_search_tool,
32
+ download_file,
 
33
  image_recognition_tool,
34
  reverse_text_tool,
35
  python_execution_tool,
36
  video_analysis_tool,
37
  audio_processing_tool,
38
+ file_type_detection_tool,
39
+ read_file_tool
40
  ]
41
  chat_with_tools = llm.bind_tools(tools)
42
 
 
47
  messages: Annotated[list[AnyMessage], add_messages]
48
 
49
 
50
+ # =========================
51
+ # File Handling Functions
52
+ # =========================
53
+ def process_question_with_files(question_data: dict) -> str:
54
+ """
55
+ Process a question that may have attached files.
56
+ Downloads and processes files, then combines with the question.
57
+ """
58
+ question_text = question_data.get('question', '')
59
+ file_name = question_data.get('file_name', '')
60
+
61
+ if not file_name:
62
+ return question_text
63
+
64
+ print(f"📎 Processing question with attached file: {file_name}")
65
+ try:
66
+ # Download the file from the API
67
+ file_url = f"{DEFAULT_API_URL}/files/{file_name}"
68
+ local_file_path = f"/tmp/{file_name}"
69
+ print(f"📥 Downloading file from: {file_url}")
70
+ download_result = download_file(file_url, local_file_path)
71
+ print(f"📥 Download result: {download_result}")
72
+ if "Failed to download" in download_result:
73
+ return f"{question_text}\n\n[Note: Could not download attached file {file_name}]"
74
+ # Detect file type
75
+ file_extension = file_name.lower().split('.')[-1]
76
+ if file_extension in ['png', 'jpg', 'jpeg', 'gif', 'bmp']:
77
+ print(f"🖼️ Processing image file: {file_name}")
78
+ image_result = image_recognition_tool.invoke(local_file_path)
79
+ enhanced_question = f"{question_text}\n\n[Image Analysis: {image_result}]"
80
+ elif file_extension in ['mp3', 'wav', 'm4a', 'flac', 'ogg']:
81
+ print(f"🎵 Processing audio file: {file_name}")
82
+ audio_result = audio_processing_tool.invoke(local_file_path)
83
+ enhanced_question = f"{question_text}\n\n[Audio Transcription: {audio_result}]"
84
+ elif file_extension in ['xls', 'xlsx', 'csv']:
85
+ print(f"📊 Processing spreadsheet file: {file_name}")
86
+ file_content = read_file_tool.invoke(local_file_path)
87
+ enhanced_question = f"{question_text}\n\n[Spreadsheet Content: {file_content}]"
88
+ elif file_extension == 'py':
89
+ print(f"🐍 Processing Python file: {file_name}")
90
+ code_content = read_file_tool.invoke(local_file_path)
91
+ enhanced_question = f"{question_text}\n\n[Python Code: {code_content}]"
92
+ else:
93
+ print(f"📄 Processing general file: {file_name}")
94
+ file_content = read_file_tool.invoke(local_file_path)
95
+ enhanced_question = f"{question_text}\n\n[File Content: {file_content}]"
96
+ try:
97
+ os.remove(local_file_path)
98
+ print(f"🧹 Cleaned up temporary file: {local_file_path}")
99
+ except:
100
+ pass
101
+ return enhanced_question
102
+ except Exception as e:
103
+ print(f"❌ Error processing file {file_name}: {e}")
104
+ return f"{question_text}\n\n[Note: Error processing attached file {file_name}: {str(e)}]"
105
+
106
+
107
  class AgentWrapper:
108
  def __init__(self):
109
  print("AgentWrapper initialized.")
110
 
111
+ def __call__(self, question_data: dict | str) -> str:
112
+ if isinstance(question_data, str):
113
+ question_text = question_data
114
+ print(
115
+ f"Agent received question (first 50 chars): {question_text[:50]}...")
116
+ else:
117
+ question_text = process_question_with_files(question_data)
118
+ print(
119
+ f"Agent received enhanced question (first 50 chars): {question_text[:50]}...")
120
  try:
121
  result = my_agent.invoke(
122
+ {"messages": [HumanMessage(content=question_text)]})
 
123
  last_message = result["messages"][-1]
124
  answer = last_message.content
125
  print(f"Agent returning answer: {answer}")
 
139
  If you're asked for a number, don't use commas or units like $ or %, unless specified.
140
  If you're asked for a string, don't use articles or abbreviations (e.g. for cities), and write digits in plain text unless told otherwise.
141
 
142
+ IMPORTANT: You have a maximum of 3 tool uses per question. After 3 tool uses, you MUST provide your best answer based on available information.
143
+
144
+ DECISION MAKING:
145
+ 1. First, try to answer from your knowledge if it's a general fact.
146
+ 2. If you need specific, current, or detailed information, use serp_search_tool ONCE.
147
+ 3. If the question looks reversed (starts with a period), use reverse_text_tool ONCE first.
148
+ 4. For file-based questions, use the appropriate file tool.
149
+ 5. After using a tool, analyze the result and provide your final answer.
150
+ 6. Do NOT cycle between tools unnecessarily.
151
+
152
+ Tool Use Guidelines:
153
+ 1. Do **not** use any tools outside of the provided tools list.
154
+ 2. Always use **only one tool at a time** in each step of your execution.
155
+ 3. You have a MAXIMUM of 3 tool uses per question.
156
+ 4. For web searches and current information, use **serp_search_tool** (15s timeout).
157
+ 5. If the question looks reversed (starts with a period or reads backward), first use **reverse_text_tool** to reverse it, then process the question.
158
+ 6. For image analysis and description, use **image_recognition_tool** (requires OpenAI API key).
159
+ 7. For Python code execution, use **python_execution_tool**.
160
+ 8. For video analysis, use **video_analysis_tool**.
161
+ 9. For audio processing, use **audio_processing_tool**.
162
+ 10. For file type detection, use **file_type_detection_tool**.
163
+ 11. For reading file contents, use **read_file_tool**.
164
+ 12. File downloading is handled automatically - you don't need to download files manually.
165
+ 13. Keep responses concise and efficient.
166
+ 14. If you can't find the answer after using 2-3 tools, provide your best estimate based on available information.
167
+ 15. NEVER use more than 3 tools for a single question.
168
+ 16. After using a tool, provide your final answer immediately.
169
+
170
+ FILE PROCESSING:
171
+ - Questions may come with attached files (mp3, excel, images, etc.)
172
+ - File processing is automatically handled before the question reaches you
173
+ - If a file is attached, you'll see the file content/analysis in the question
174
+ - Focus on answering based on the enhanced question that includes file information
175
+ - You don't need to process files manually - just use the information provided""")
176
 
177
  # Combine system message with user messages
178
  all_messages = [system_message] + state["messages"]
requirements.txt CHANGED
@@ -11,10 +11,10 @@ transformers
11
  torch
12
  pillow
13
  python-dotenv
14
- duckduckgo-search
15
  pandas
16
  datasets
17
  langchain-huggingface
18
- pathlib
19
- SpeechRecognition
20
- librosa
 
 
11
  torch
12
  pillow
13
  python-dotenv
 
14
  pandas
15
  datasets
16
  langchain-huggingface
17
+ pydub
18
+ google-search-results
19
+ pytesseract
20
+ numpy
tools.py CHANGED
@@ -1,76 +1,151 @@
1
- from io import BytesIO
2
- import requests
3
- from PIL import Image as PILImage
4
- from transformers import BlipProcessor, BlipForConditionalGeneration
5
- from langchain_core.messages import AnyMessage, HumanMessage, AIMessage
6
- from huggingface_hub import list_models
7
- import random
8
- import pprint
9
- from langchain_community.tools import DuckDuckGoSearchRun
10
- from langchain_huggingface import HuggingFaceEndpoint, ChatHuggingFace
11
- from langgraph.prebuilt import tools_condition
12
- from langgraph.graph import START, StateGraph
13
-
14
- from langgraph.prebuilt import ToolNode
15
- from langchain_core.messages import AnyMessage, HumanMessage, AIMessage, SystemMessage
16
- from langgraph.graph.message import add_messages
17
- from typing import TypedDict, Annotated
18
- from langchain.tools import Tool
19
- from langchain_community.retrievers import BM25Retriever
20
- from langchain.docstore.document import Document
21
- import datasets
22
- from langchain_openai import ChatOpenAI
23
- from dotenv import load_dotenv
24
  import os
25
- import torch
26
- import base64
27
  import subprocess
28
  import tempfile
29
- import json
30
- import re
31
  from pathlib import Path
 
 
 
 
 
 
 
 
 
 
 
 
32
 
33
  # Load environment variables
34
- load_dotenv()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
35
 
36
- # DEFINE HUB STAT TOOLS
37
 
 
 
 
 
 
38
 
39
- def get_hub_stats(author: str) -> str:
40
- """Fetches the most downloaded model from a specific author on the Hugging Face Hub."""
41
- try:
42
- # List models from the specified author, sorted by downloads
43
- models = list(list_models(
44
- author=author, sort="downloads", direction=-1, limit=1))
45
 
46
- if models:
47
- model = models[0]
48
- return f"The most downloaded model by {author} is {model.id} with {model.downloads:,} downloads."
49
- else:
50
- return f"No models found for author {author}."
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
51
  except Exception as e:
52
- return f"Error fetching models for {author}: {str(e)}"
53
 
54
 
55
- # Initialize the tool
56
- hub_stats_tool = Tool(
57
- name="get_hub_stats",
58
- func=get_hub_stats,
59
- description="Search HuggingFace Hub for model statistics, downloads, and author information. Use this when asking about specific models, authors, or HuggingFace Hub data."
60
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
61
 
62
- # DEFINE WEB SEARCH TOOLS
63
- web_search_tool = Tool(
64
- name="search_tool",
65
- func=DuckDuckGoSearchRun(),
66
- description="Search the general web for current information, news, and general knowledge. Use this for questions about companies, people, events, etc."
67
  )
68
 
69
- # REVERSE TOOLS
 
 
70
 
71
 
72
  def ReverseTextTool(text: str) -> str:
73
- """Reverses the order of characters in a given text string."""
 
 
 
 
 
 
74
  try:
75
  return text[::-1]
76
  except Exception as e:
@@ -83,85 +158,102 @@ reverse_text_tool = Tool(
83
  description="Reverses the order of characters in a given text string. Use this when you need to reverse text."
84
  )
85
 
86
- # DOWNLOAD A FILE
87
-
88
-
89
- def download_file(url: str) -> str:
90
- """Downloads a file from a given URL and returns the local file path."""
91
- try:
92
- response = requests.get(url, timeout=30)
93
- response.raise_for_status()
94
-
95
- # Define save_path - extract filename from URL
96
- filename = url.split(
97
- '/')[-1] if url.split('/')[-1] else 'downloaded_file'
98
- save_path = f"./{filename}"
99
 
100
- with open(save_path, "wb") as f:
101
- f.write(response.content)
102
- return save_path
103
- except Exception as e:
104
- return f"Failed to download: {e}"
105
 
106
 
107
- download_file_tool = Tool(
108
- name="download_file_tool",
109
- func=download_file,
110
- description="Downloads a file from a given URL and returns the local file path."
111
- )
 
 
 
 
 
 
112
 
113
- # PYTHON CODE EXECUTION TOOL
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
114
 
 
 
115
 
116
- def execute_python_code(code: str) -> str:
117
- """Executes Python code and returns the output."""
118
- try:
119
- # Create a temporary file to execute the code
120
- with tempfile.NamedTemporaryFile(mode='w', suffix='.py', delete=False) as f:
121
- f.write(code)
122
- temp_file = f.name
123
 
124
- # Execute the code
125
- result = subprocess.run(['python', temp_file],
126
- capture_output=True, text=True, timeout=30)
127
 
128
- # Clean up
129
- os.unlink(temp_file)
 
130
 
131
- if result.returncode == 0:
132
- return f"Output: {result.stdout.strip()}"
133
- else:
134
- return f"Error: {result.stderr.strip()}"
135
- except subprocess.TimeoutExpired:
136
- return "Error: Code execution timed out"
137
  except Exception as e:
138
- return f"Error executing Python code: {str(e)}"
139
 
140
 
141
- python_execution_tool = Tool(
142
- name="python_execution_tool",
143
- func=execute_python_code,
144
- description="Executes Python code and returns the output. Use this when you need to run Python scripts or calculate values."
145
  )
146
 
147
- # VIDEO ANALYSIS TOOL
148
-
149
 
150
  def analyze_video(video_url: str) -> str:
151
- """Analyzes video content from YouTube or other video URLs."""
 
 
 
 
 
 
152
  try:
153
- # Extract video ID from YouTube URL
154
  if 'youtube.com' in video_url or 'youtu.be' in video_url:
155
- # For YouTube videos, we can extract some basic info
156
  video_id = None
157
  if 'youtube.com/watch?v=' in video_url:
158
  video_id = video_url.split('watch?v=')[1].split('&')[0]
159
  elif 'youtu.be/' in video_url:
160
  video_id = video_url.split('youtu.be/')[1].split('?')[0]
161
-
162
  if video_id:
163
- # Use web search to get video information
164
- search_result = web_search_tool.func(
165
  f"youtube video {video_id} title description")
166
  return f"Video analysis for {video_id}: {search_result}"
167
  else:
@@ -178,89 +270,39 @@ video_analysis_tool = Tool(
178
  description="Analyzes video content from URLs. Use this when questions involve video content or YouTube links."
179
  )
180
 
181
- # AUDIO PROCESSING TOOL
182
-
183
-
184
- def process_audio(audio_file_path: str) -> str:
185
- """Processes audio files to extract information."""
186
- try:
187
- if not os.path.exists(audio_file_path):
188
- return f"Audio file not found: {audio_file_path}"
189
-
190
- # For now, return basic file info
191
- file_size = os.path.getsize(audio_file_path)
192
- file_extension = Path(audio_file_path).suffix
193
-
194
- return f"Audio file: {audio_file_path}, Size: {file_size} bytes, Type: {file_extension}. Audio processing requires additional libraries like speech_recognition or librosa."
195
- except Exception as e:
196
- return f"Error processing audio: {str(e)}"
197
-
198
-
199
- audio_processing_tool = Tool(
200
- name="audio_processing_tool",
201
- func=process_audio,
202
- description="Processes audio files to extract information. Use this when questions involve audio files or MP3 content."
203
- )
204
-
205
- # ENHANCED IMAGE RECOGNITION TOOLS
206
-
207
-
208
- def create_vision_llm():
209
- """Creates a vision-capable LLM with proper error handling."""
210
- try:
211
- # Check if OpenAI API key is available
212
- if not os.getenv("OPENAI_API_KEY"):
213
- return None, "OpenAI API key not found. Please set OPENAI_API_KEY in your environment variables."
214
-
215
- vision_llm = ChatOpenAI(model="gpt-4o")
216
- return vision_llm, None
217
- except Exception as e:
218
- return None, f"Error creating vision LLM: {str(e)}"
219
 
220
 
221
  def image_recognition(img_path: str) -> str:
222
- """Analyzes and describes the content of images using AI vision."""
 
 
 
 
 
 
223
  try:
224
- # Check if file exists
225
  if not os.path.exists(img_path):
226
  return f"Error: Image file not found at {img_path}"
227
-
228
- # Create vision LLM
229
- vision_llm, error = create_vision_llm()
230
- if error:
231
- return error
232
-
233
- # Read image and encode as base64
234
  with open(img_path, "rb") as image_file:
235
  image_bytes = image_file.read()
236
-
237
  image_base64 = base64.b64encode(image_bytes).decode("utf-8")
238
-
239
- # Prepare the prompt including the base64 image data
240
  message = [
241
  HumanMessage(
242
  content=[
243
- {
244
- "type": "text",
245
- "text": (
246
- "Describe the image or extract all the text from this image. "
247
- "Return only the description or extracted text, no explanations."
248
- ),
249
- },
250
- {
251
- "type": "image_url",
252
- "image_url": {
253
- "url": f"data:image/png;base64,{image_base64}"
254
- },
255
- },
256
  ]
257
  )
258
  ]
259
-
260
- # Call the vision-capable model
261
  response = vision_llm.invoke(message)
262
  return response.content.strip()
263
-
264
  except Exception as e:
265
  return f"Error analyzing image: {str(e)}"
266
 
@@ -271,18 +313,24 @@ image_recognition_tool = Tool(
271
  description="Analyzes and describes the content of images using AI vision. Use this when you need to understand what's in an image."
272
  )
273
 
274
- # FILE TYPE DETECTION TOOL
 
 
275
 
276
 
277
  def detect_file_type(file_path: str) -> str:
278
- """Detects the type of file and provides appropriate handling suggestions."""
 
 
 
 
 
 
279
  try:
280
  if not os.path.exists(file_path):
281
  return f"File not found: {file_path}"
282
-
283
  file_extension = Path(file_path).suffix.lower()
284
  file_size = os.path.getsize(file_path)
285
-
286
  file_types = {
287
  '.py': 'Python script',
288
  '.mp3': 'Audio file',
@@ -297,7 +345,6 @@ def detect_file_type(file_path: str) -> str:
297
  '.xls': 'Excel spreadsheet',
298
  '.xlsx': 'Excel spreadsheet'
299
  }
300
-
301
  file_type = file_types.get(file_extension, 'Unknown file type')
302
  return f"File: {file_path}, Type: {file_type}, Size: {file_size} bytes"
303
  except Exception as e:
@@ -309,3 +356,173 @@ file_type_detection_tool = Tool(
309
  func=detect_file_type,
310
  description="Detects file types and provides information about files. Use this when you need to understand what type of file you're working with."
311
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # =========================
2
+ # Imports and Environment
3
+ # =========================
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4
  import os
5
+ import requests
 
6
  import subprocess
7
  import tempfile
8
+ import base64
9
+ import io
10
  from pathlib import Path
11
+ from dotenv import load_dotenv
12
+ from typing import TypedDict, Annotated
13
+ from huggingface_hub import list_models
14
+ from langchain.tools import Tool
15
+ from langchain_community.utilities import SerpAPIWrapper
16
+ from langchain_core.messages import HumanMessage
17
+ from langchain_huggingface import ChatHuggingFace
18
+ from langchain_openai import ChatOpenAI
19
+ import openai
20
+ from pydub import AudioSegment
21
+ import pandas as pd
22
+ from PIL import Image
23
 
24
  # Load environment variables
25
+ print("Current working directory:", os.getcwd())
26
+
27
+ load_dotenv(dotenv_path=os.path.join(os.path.dirname(__file__), ".env"))
28
+ # =========================
29
+ # 1. Web Search Tools
30
+ # =========================
31
+
32
+
33
+ def serp_search(query: str) -> str:
34
+ """
35
+ Searches the web using SerpAPI and returns the top result snippet.
36
+ Args:
37
+ query (str): The search query.
38
+ Returns:
39
+ str: The top result snippet or an error message.
40
+ """
41
+ try:
42
+ search = SerpAPIWrapper()
43
+ results = search.run(query)
44
+ return results
45
+ except Exception as e:
46
+ return f"Search failed: {e}"
47
 
 
48
 
49
+ serp_search_tool = Tool(
50
+ name="serp_search_tool",
51
+ func=serp_search,
52
+ description="Searches the web using SerpAPI and returns the top result."
53
+ )
54
 
 
 
 
 
 
 
55
 
56
+ # =========================
57
+ # 2. File Download/Handling Tools
58
+ # =========================
59
+
60
+ # Note: File downloading is now handled in app.py via process_question_with_files()
61
+ # This section is kept for reference but the download_file_tool is not exported
62
+
63
+ def download_file(url: str, save_path: str) -> str:
64
+ """
65
+ Downloads a file from a URL and saves it to the given path.
66
+ Args:
67
+ url (str): The URL from which to download the file.
68
+ save_path (str): The local file path where the downloaded file will be saved.
69
+ Returns:
70
+ str: A message indicating the result of the download operation.
71
+ """
72
+ try:
73
+ # Reduced from 30 to 15 seconds
74
+ response = requests.get(url, timeout=15)
75
+ response.raise_for_status()
76
+ with open(save_path, "wb") as f:
77
+ f.write(response.content)
78
+ return f"File downloaded to {save_path}"
79
  except Exception as e:
80
+ return f"Failed to download: {e}"
81
 
82
 
83
+ # download_file_tool is now used internally by process_question_with_files() in app.py
84
+ # and is not exported as a standalone tool for the agent
85
+
86
+ # =========================
87
+ # 3. Python Execution Tools
88
+ # =========================
89
+
90
+
91
+ def RunPythonFileTool(file_path: str) -> str:
92
+ """
93
+ Executes a Python script loaded from the specified path using the PythonInterpreterTool if available, otherwise subprocess.
94
+ Args:
95
+ file_path (str): The full path to the python (.py) file containing the Python code.
96
+ Returns:
97
+ str: The output produced by the code execution, or an error message if it fails.
98
+ """
99
+ try:
100
+ if not os.path.exists(file_path):
101
+ return f"File not found: {file_path}"
102
+ with open(file_path, "r") as f:
103
+ code = f.read()
104
+ try:
105
+ from langchain.tools.python.tool import PythonInterpreterTool
106
+ interpreter = PythonInterpreterTool()
107
+ result = interpreter.run({"code": code})
108
+ return result.get("output", "No output returned.")
109
+ except ImportError:
110
+ with tempfile.NamedTemporaryFile(mode='w', suffix='.py', delete=False) as temp:
111
+ temp.write(code)
112
+ temp_path = temp.name
113
+ result = subprocess.run(
114
+ ["python", temp_path],
115
+ capture_output=True,
116
+ text=True,
117
+ timeout=15
118
+ )
119
+ os.unlink(temp_path)
120
+ if result.returncode == 0:
121
+ return result.stdout.strip() or "No output returned."
122
+ else:
123
+ return f"Error: {result.stderr.strip()}"
124
+ except subprocess.TimeoutExpired:
125
+ return "Error: Code execution timed out"
126
+ except Exception as e:
127
+ return f"Execution failed: {e}"
128
+
129
 
130
+ python_execution_tool = Tool(
131
+ name="python_execution_tool",
132
+ func=RunPythonFileTool,
133
+ description="Executes Python code and returns the output. Use this when you need to run Python scripts or calculate values."
 
134
  )
135
 
136
+ # =========================
137
+ # 4. Text Utilities
138
+ # =========================
139
 
140
 
141
  def ReverseTextTool(text: str) -> str:
142
+ """
143
+ Reverses the order of characters in a given text string.
144
+ Args:
145
+ text (str): The text to reverse.
146
+ Returns:
147
+ str: The reversed text or an error message.
148
+ """
149
  try:
150
  return text[::-1]
151
  except Exception as e:
 
158
  description="Reverses the order of characters in a given text string. Use this when you need to reverse text."
159
  )
160
 
 
 
 
 
 
 
 
 
 
 
 
 
 
161
 
162
+ # =========================
163
+ # 5. Audio, Video, and Image Tools
164
+ # =========================
 
 
165
 
166
 
167
+ def process_audio(audio_file_path: str) -> str:
168
+ """
169
+ Processes audio files to extract information and transcribe speech content.
170
+ Args:
171
+ audio_file_path (str): Path to the audio file.
172
+ Returns:
173
+ str: Transcription result or file info with error message.
174
+ """
175
+ try:
176
+ if not os.path.exists(audio_file_path):
177
+ return f"Audio file not found: {audio_file_path}"
178
 
179
+ file_extension = Path(audio_file_path).suffix.lower()
180
+
181
+ # Check if it's an audio file we can process
182
+ if file_extension not in ['.mp3', '.wav', '.m4a', '.flac', '.ogg']:
183
+ file_size = os.path.getsize(audio_file_path)
184
+ return f"Audio file: {audio_file_path}, Size: {file_size} bytes, Type: {file_extension}. Unsupported audio format for transcription."
185
+
186
+ # Try to transcribe the audio
187
+ try:
188
+ # Initialize OpenAI client
189
+ client = openai.OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
190
+
191
+ # Convert MP3 to WAV if needed (Whisper works better with WAV)
192
+ if file_extension == '.mp3':
193
+ audio = AudioSegment.from_mp3(audio_file_path)
194
+ # Export as WAV to a temporary buffer
195
+ wav_buffer = io.BytesIO()
196
+ audio.export(wav_buffer, format="wav")
197
+ wav_buffer.seek(0)
198
+
199
+ # Use the WAV buffer for transcription
200
+ transcription = client.audio.transcriptions.create(
201
+ model="whisper-1",
202
+ file=wav_buffer,
203
+ response_format="text"
204
+ )
205
+ else:
206
+ # For other formats, try direct transcription
207
+ with open(audio_file_path, "rb") as audio_file:
208
+ transcription = client.audio.transcriptions.create(
209
+ model="whisper-1",
210
+ file=audio_file,
211
+ response_format="text"
212
+ )
213
 
214
+ file_size = os.path.getsize(audio_file_path)
215
+ return f"Transcription successful!\nFile: {audio_file_path}\nSize: {file_size} bytes\nType: {file_extension}\n\nTranscription:\n{transcription}"
216
 
217
+ except openai.AuthenticationError:
218
+ file_size = os.path.getsize(audio_file_path)
219
+ return f"Audio file: {audio_file_path}, Size: {file_size} bytes, Type: {file_extension}. OpenAI API key not found or invalid. Please set OPENAI_API_KEY in your environment variables."
 
 
 
 
220
 
221
+ except openai.BadRequestError as e:
222
+ file_size = os.path.getsize(audio_file_path)
223
+ return f"Audio file: {audio_file_path}, Size: {file_size} bytes, Type: {file_extension}. Audio format not supported or file too large: {str(e)}"
224
 
225
+ except Exception as e:
226
+ file_size = os.path.getsize(audio_file_path)
227
+ return f"Audio file: {audio_file_path}, Size: {file_size} bytes, Type: {file_extension}. Transcription error: {str(e)}"
228
 
 
 
 
 
 
 
229
  except Exception as e:
230
+ return f"Error processing audio: {str(e)}"
231
 
232
 
233
+ audio_processing_tool = Tool(
234
+ name="audio_processing_tool",
235
+ func=process_audio,
236
+ description="Transcribes audio files (MP3, WAV, M4A, FLAC, OGG) to text using speech recognition. Use this when you need to convert speech in audio files to text."
237
  )
238
 
 
 
239
 
240
  def analyze_video(video_url: str) -> str:
241
+ """
242
+ Analyzes video content from YouTube or other video URLs.
243
+ Args:
244
+ video_url (str): The video URL.
245
+ Returns:
246
+ str: Video analysis or an error message.
247
+ """
248
  try:
 
249
  if 'youtube.com' in video_url or 'youtu.be' in video_url:
 
250
  video_id = None
251
  if 'youtube.com/watch?v=' in video_url:
252
  video_id = video_url.split('watch?v=')[1].split('&')[0]
253
  elif 'youtu.be/' in video_url:
254
  video_id = video_url.split('youtu.be/')[1].split('?')[0]
 
255
  if video_id:
256
+ search_result = serp_search(
 
257
  f"youtube video {video_id} title description")
258
  return f"Video analysis for {video_id}: {search_result}"
259
  else:
 
270
  description="Analyzes video content from URLs. Use this when questions involve video content or YouTube links."
271
  )
272
 
273
+ # =========================
274
+ # 6. Image Recognition Tools
275
+ # =========================
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
276
 
277
 
278
  def image_recognition(img_path: str) -> str:
279
+ """
280
+ Analyzes and describes the content of images using AI vision.
281
+ Args:
282
+ img_path (str): Path to the image file.
283
+ Returns:
284
+ str: Description or extracted text, or an error message.
285
+ """
286
  try:
 
287
  if not os.path.exists(img_path):
288
  return f"Error: Image file not found at {img_path}"
289
+ if not os.getenv("OPENAI_API_KEY"):
290
+ return "OpenAI API key not found. Please set OPENAI_API_KEY in your environment variables."
291
+ vision_llm = ChatOpenAI(model="gpt-4o")
 
 
 
 
292
  with open(img_path, "rb") as image_file:
293
  image_bytes = image_file.read()
 
294
  image_base64 = base64.b64encode(image_bytes).decode("utf-8")
 
 
295
  message = [
296
  HumanMessage(
297
  content=[
298
+ {"type": "text", "text": "Describe the image or extract all the text from this image. Return only the description or extracted text, no explanations."},
299
+ {"type": "image_url", "image_url": {
300
+ "url": f"data:image/png;base64,{image_base64}"}},
 
 
 
 
 
 
 
 
 
 
301
  ]
302
  )
303
  ]
 
 
304
  response = vision_llm.invoke(message)
305
  return response.content.strip()
 
306
  except Exception as e:
307
  return f"Error analyzing image: {str(e)}"
308
 
 
313
  description="Analyzes and describes the content of images using AI vision. Use this when you need to understand what's in an image."
314
  )
315
 
316
+ # =========================
317
+ # 7. File Type Detection
318
+ # =========================
319
 
320
 
321
  def detect_file_type(file_path: str) -> str:
322
+ """
323
+ Detects the type of file and provides appropriate handling suggestions.
324
+ Args:
325
+ file_path (str): Path to the file.
326
+ Returns:
327
+ str: File type info or an error message.
328
+ """
329
  try:
330
  if not os.path.exists(file_path):
331
  return f"File not found: {file_path}"
 
332
  file_extension = Path(file_path).suffix.lower()
333
  file_size = os.path.getsize(file_path)
 
334
  file_types = {
335
  '.py': 'Python script',
336
  '.mp3': 'Audio file',
 
345
  '.xls': 'Excel spreadsheet',
346
  '.xlsx': 'Excel spreadsheet'
347
  }
 
348
  file_type = file_types.get(file_extension, 'Unknown file type')
349
  return f"File: {file_path}, Type: {file_type}, Size: {file_size} bytes"
350
  except Exception as e:
 
356
  func=detect_file_type,
357
  description="Detects file types and provides information about files. Use this when you need to understand what type of file you're working with."
358
  )
359
+
360
+ # =========================
361
+ # 8. Enhanced File Reading Tools
362
+ # =========================
363
+
364
+
365
+ def read_file(file_name: str) -> str:
366
+ """
367
+ Read and process different file types (text, CSV, images).
368
+ """
369
+ if not file_name or not os.path.exists(file_name):
370
+ return "File not found"
371
+
372
+ try:
373
+ file_extension = os.path.splitext(file_name)[1].lower()
374
+
375
+ if file_extension == ".csv":
376
+ return _read_csv_file(file_name)
377
+ elif file_extension in [".png", ".jpg", ".jpeg", ".gif", ".bmp"]:
378
+ return _read_image_file(file_name)
379
+ elif file_extension in [".txt", ".md", ".py", ".js", ".html", ".json"]:
380
+ return _read_text_file(file_name)
381
+ else:
382
+ # Try to read as text file
383
+ return _read_text_file(file_name)
384
+
385
+ except Exception as e:
386
+ return f"Error reading file: {str(e)}"
387
+
388
+
389
+ def _read_text_file(file_name: str) -> str:
390
+ """Read a text file."""
391
+ try:
392
+ with open(file_name, "r", encoding="utf-8") as f:
393
+ content = f.read()
394
+ return content[:5000] # Limit to first 5000 characters
395
+ except UnicodeDecodeError:
396
+ # Try with different encoding
397
+ try:
398
+ with open(file_name, "r", encoding="latin-1") as f:
399
+ content = f.read()
400
+ return content[:5000]
401
+ except Exception as e:
402
+ return f"Text file reading error: {str(e)}"
403
+
404
+
405
+ def _read_csv_file(file_name: str) -> str:
406
+ """Read and summarize a CSV file."""
407
+ try:
408
+ df = pd.read_csv(file_name)
409
+
410
+ # Create a summary
411
+ summary = []
412
+ summary.append(
413
+ f"CSV file shape: {df.shape[0]} rows, {df.shape[1]} columns")
414
+ summary.append(f"Columns: {', '.join(df.columns.tolist())}")
415
+
416
+ # Show first few rows
417
+ summary.append("\nFirst 5 rows:")
418
+ summary.append(df.head().to_string())
419
+
420
+ # Show basic statistics for numeric columns
421
+ numeric_columns = df.select_dtypes(include=['number']).columns
422
+ if len(numeric_columns) > 0:
423
+ summary.append(f"\nNumeric column statistics:")
424
+ summary.append(df[numeric_columns].describe().to_string())
425
+
426
+ return "\n".join(summary)
427
+
428
+ except Exception as e:
429
+ return f"CSV reading error: {str(e)}"
430
+
431
+
432
+ def _read_image_file(file_name: str) -> str:
433
+ """Read and analyze an image file."""
434
+ try:
435
+ # Try OCR first
436
+ try:
437
+ import pytesseract
438
+ img = Image.open(file_name)
439
+
440
+ # Get image info
441
+ info = f"Image: {img.size[0]}x{img.size[1]} pixels, mode: {img.mode}"
442
+
443
+ # Try OCR
444
+ text = pytesseract.image_to_string(img).strip()
445
+ if text:
446
+ return f"{info}\n\nExtracted text:\n{text}"
447
+ else:
448
+ return f"{info}\n\nNo text detected in image."
449
+
450
+ except ImportError:
451
+ # OCR not available, just return image info
452
+ img = Image.open(file_name)
453
+ return f"Image: {img.size[0]}x{img.size[1]} pixels, mode: {img.mode}\n(OCR not available - install pytesseract for text extraction)"
454
+
455
+ except Exception as e:
456
+ return f"Image reading error: {str(e)}"
457
+
458
+
459
+ read_file_tool = Tool(
460
+ name="read_file_tool",
461
+ func=read_file,
462
+ description="Reads and processes different file types including text files, CSV files, and images. Use this when you need to extract content from files."
463
+ )
464
+
465
+ # =========================
466
+ # 9. Code Execution and Math Tools
467
+ # =========================
468
+
469
+
470
+ def execute_code(code: str, timeout: int = 5) -> str:
471
+ """
472
+ Execute Python code safely with timeout.
473
+ """
474
+ try:
475
+ # Basic security check - prevent dangerous operations
476
+ dangerous_keywords = [
477
+ "import os", "import subprocess", "__import__", "exec", "eval", "open("]
478
+ if any(keyword in code.lower() for keyword in dangerous_keywords):
479
+ return "Code execution blocked: potentially unsafe operations detected"
480
+
481
+ result = subprocess.run(
482
+ ["python3", "-c", code],
483
+ capture_output=True,
484
+ text=True,
485
+ timeout=timeout,
486
+ cwd="/tmp" # Run in safe directory
487
+ )
488
+
489
+ if result.returncode == 0:
490
+ return result.stdout.strip() if result.stdout else "Code executed successfully (no output)"
491
+ else:
492
+ return f"Code execution error: {result.stderr.strip()}"
493
+
494
+ except subprocess.TimeoutExpired:
495
+ return "Code execution timeout"
496
+ except Exception as e:
497
+ return f"Code execution error: {str(e)}"
498
+
499
+
500
+ def calculate_simple_math(expression: str) -> str:
501
+ """
502
+ Safely evaluate simple mathematical expressions.
503
+ """
504
+ try:
505
+ # Only allow basic math characters
506
+ allowed_chars = set("0123456789+-*/.() ")
507
+ if not all(c in allowed_chars for c in expression):
508
+ return "Invalid mathematical expression"
509
+
510
+ # Use eval safely for basic math
511
+ result = eval(expression)
512
+ return str(result)
513
+
514
+ except Exception as e:
515
+ return f"Math calculation error: {str(e)}"
516
+
517
+
518
+ code_execution_tool = Tool(
519
+ name="code_execution_tool",
520
+ func=execute_code,
521
+ description="Executes Python code safely with timeout and security checks. Use this when you need to run small Python code snippets."
522
+ )
523
+
524
+ math_calculation_tool = Tool(
525
+ name="math_calculation_tool",
526
+ func=calculate_simple_math,
527
+ description="Safely evaluates simple mathematical expressions. Use this when you need to perform basic math calculations."
528
+ )