Spaces:

AbdullahIsaMarkus
/

Final_Assignment_Agent

Sleeping

Final_Assignment_Agent / test_multimedia.py

Markus Clauss DIRU Vetsuisse

First agent traila

1637cd5 6 months ago

3.97 kB

	#!/usr/bin/env python3
	"""
	Test multimedia handling for GAIA agent
	"""

	import os
	from dotenv import load_dotenv

	# Load environment variables
	load_dotenv()

	# Import the agent
	from app import LangGraphAgent

	def test_multimedia_questions():
	"""Test questions that involve multimedia content"""

	print("Testing GAIA agent with multimedia questions...")
	print("=" * 80)

	# Initialize agent
	agent = LangGraphAgent()

	# Test questions from the GAIA benchmark that involve multimedia
	test_questions = [
	# YouTube video question
	{
	"question": 'In the video https://www.youtube.com/watch?v=1htKBjuUWec, Verma claims the existence of "a "moat" in the education system that provides a systemic advantage for those who know about it and can get into the pipeline." Verma\'s "moat" is a well-known advantage for students. What is the four-letter abbreviation used to describe this systemic advantage?',
	"expected": "Should extract transcript and find STEM"
	},

	# Image question (should return "Unable to determine")
	{
	"question": "Look at the attached image and tell me what color is the car?",
	"expected": "Unable to determine without access to image files"
	},

	# Excel file question (should return "Unable to determine")
	{
	"question": "What is the sum of all values in column B of the attached Excel file?",
	"expected": "Unable to determine without access to Excel files"
	},

	# Audio question (should return "Unable to determine")
	{
	"question": "What song is playing in the attached audio file?",
	"expected": "Unable to determine without access to audio files"
	},

	# PDF question (should return "Unable to determine")
	{
	"question": "What is written on page 3 of the attached PDF?",
	"expected": "Unable to determine without access to PDF files"
	},

	# Another YouTube question with shortened URL
	{
	"question": "In the YouTube video at https://youtu.be/dQw4w9WgXcQ, what is the main theme?",
	"expected": "Should extract transcript from Rick Astley video"
	}
	]

	# Test each question
	for i, test_case in enumerate(test_questions, 1):
	question = test_case["question"]
	expected = test_case["expected"]

	print(f"\nTest {i}: {question[:80]}...")
	print(f"Expected behavior: {expected}")

	try:
	# Get the answer
	answer = agent.run(question)

	print(f"Answer: {answer}")

	# Check if multimedia was handled appropriately
	if "youtube" in question.lower() or "youtu.be" in question.lower():
	if "Unable to determine" in answer:
	print("❌ Failed to extract YouTube transcript")
	else:
	print("✅ Successfully handled YouTube content")
	elif any(keyword in question.lower() for keyword in ["image", "excel", "audio", "pdf", "attached"]):
	if "Unable to determine" in answer:
	print("✅ Correctly returned 'Unable to determine' for inaccessible file")
	else:
	print("❌ Should have returned 'Unable to determine'")

	except Exception as e:
	print(f"❌ Error: {type(e).__name__}: {e}")

	print("-" * 80)

	print("\n" + "=" * 80)
	print("Multimedia handling test complete!")
	print("=" * 80)

	if __name__ == "__main__":
	# Check for API key
	if not os.getenv("ANTHROPIC_API_KEY"):
	print("Error: ANTHROPIC_API_KEY not found in environment variables")
	print("Please set it in your .env file")
	exit(1)

	test_multimedia_questions()