Final_Assignment_Agent / test_multimedia.py
Markus Clauss DIRU Vetsuisse
First agent traila
1637cd5
#!/usr/bin/env python3
"""
Test multimedia handling for GAIA agent
"""
import os
from dotenv import load_dotenv
# Load environment variables
load_dotenv()
# Import the agent
from app import LangGraphAgent
def test_multimedia_questions():
"""Test questions that involve multimedia content"""
print("Testing GAIA agent with multimedia questions...")
print("=" * 80)
# Initialize agent
agent = LangGraphAgent()
# Test questions from the GAIA benchmark that involve multimedia
test_questions = [
# YouTube video question
{
"question": 'In the video https://www.youtube.com/watch?v=1htKBjuUWec, Verma claims the existence of "a "moat" in the education system that provides a systemic advantage for those who know about it and can get into the pipeline." Verma\'s "moat" is a well-known advantage for students. What is the four-letter abbreviation used to describe this systemic advantage?',
"expected": "Should extract transcript and find STEM"
},
# Image question (should return "Unable to determine")
{
"question": "Look at the attached image and tell me what color is the car?",
"expected": "Unable to determine without access to image files"
},
# Excel file question (should return "Unable to determine")
{
"question": "What is the sum of all values in column B of the attached Excel file?",
"expected": "Unable to determine without access to Excel files"
},
# Audio question (should return "Unable to determine")
{
"question": "What song is playing in the attached audio file?",
"expected": "Unable to determine without access to audio files"
},
# PDF question (should return "Unable to determine")
{
"question": "What is written on page 3 of the attached PDF?",
"expected": "Unable to determine without access to PDF files"
},
# Another YouTube question with shortened URL
{
"question": "In the YouTube video at https://youtu.be/dQw4w9WgXcQ, what is the main theme?",
"expected": "Should extract transcript from Rick Astley video"
}
]
# Test each question
for i, test_case in enumerate(test_questions, 1):
question = test_case["question"]
expected = test_case["expected"]
print(f"\nTest {i}: {question[:80]}...")
print(f"Expected behavior: {expected}")
try:
# Get the answer
answer = agent.run(question)
print(f"Answer: {answer}")
# Check if multimedia was handled appropriately
if "youtube" in question.lower() or "youtu.be" in question.lower():
if "Unable to determine" in answer:
print("❌ Failed to extract YouTube transcript")
else:
print("βœ… Successfully handled YouTube content")
elif any(keyword in question.lower() for keyword in ["image", "excel", "audio", "pdf", "attached"]):
if "Unable to determine" in answer:
print("βœ… Correctly returned 'Unable to determine' for inaccessible file")
else:
print("❌ Should have returned 'Unable to determine'")
except Exception as e:
print(f"❌ Error: {type(e).__name__}: {e}")
print("-" * 80)
print("\n" + "=" * 80)
print("Multimedia handling test complete!")
print("=" * 80)
if __name__ == "__main__":
# Check for API key
if not os.getenv("ANTHROPIC_API_KEY"):
print("Error: ANTHROPIC_API_KEY not found in environment variables")
print("Please set it in your .env file")
exit(1)
test_multimedia_questions()