|
|
|
|
|
""" |
|
|
Test multimedia handling for GAIA agent |
|
|
""" |
|
|
|
|
|
import os |
|
|
from dotenv import load_dotenv |
|
|
|
|
|
|
|
|
load_dotenv() |
|
|
|
|
|
|
|
|
from app import LangGraphAgent |
|
|
|
|
|
def test_multimedia_questions(): |
|
|
"""Test questions that involve multimedia content""" |
|
|
|
|
|
print("Testing GAIA agent with multimedia questions...") |
|
|
print("=" * 80) |
|
|
|
|
|
|
|
|
agent = LangGraphAgent() |
|
|
|
|
|
|
|
|
test_questions = [ |
|
|
|
|
|
{ |
|
|
"question": 'In the video https://www.youtube.com/watch?v=1htKBjuUWec, Verma claims the existence of "a "moat" in the education system that provides a systemic advantage for those who know about it and can get into the pipeline." Verma\'s "moat" is a well-known advantage for students. What is the four-letter abbreviation used to describe this systemic advantage?', |
|
|
"expected": "Should extract transcript and find STEM" |
|
|
}, |
|
|
|
|
|
|
|
|
{ |
|
|
"question": "Look at the attached image and tell me what color is the car?", |
|
|
"expected": "Unable to determine without access to image files" |
|
|
}, |
|
|
|
|
|
|
|
|
{ |
|
|
"question": "What is the sum of all values in column B of the attached Excel file?", |
|
|
"expected": "Unable to determine without access to Excel files" |
|
|
}, |
|
|
|
|
|
|
|
|
{ |
|
|
"question": "What song is playing in the attached audio file?", |
|
|
"expected": "Unable to determine without access to audio files" |
|
|
}, |
|
|
|
|
|
|
|
|
{ |
|
|
"question": "What is written on page 3 of the attached PDF?", |
|
|
"expected": "Unable to determine without access to PDF files" |
|
|
}, |
|
|
|
|
|
|
|
|
{ |
|
|
"question": "In the YouTube video at https://youtu.be/dQw4w9WgXcQ, what is the main theme?", |
|
|
"expected": "Should extract transcript from Rick Astley video" |
|
|
} |
|
|
] |
|
|
|
|
|
|
|
|
for i, test_case in enumerate(test_questions, 1): |
|
|
question = test_case["question"] |
|
|
expected = test_case["expected"] |
|
|
|
|
|
print(f"\nTest {i}: {question[:80]}...") |
|
|
print(f"Expected behavior: {expected}") |
|
|
|
|
|
try: |
|
|
|
|
|
answer = agent.run(question) |
|
|
|
|
|
print(f"Answer: {answer}") |
|
|
|
|
|
|
|
|
if "youtube" in question.lower() or "youtu.be" in question.lower(): |
|
|
if "Unable to determine" in answer: |
|
|
print("β Failed to extract YouTube transcript") |
|
|
else: |
|
|
print("β
Successfully handled YouTube content") |
|
|
elif any(keyword in question.lower() for keyword in ["image", "excel", "audio", "pdf", "attached"]): |
|
|
if "Unable to determine" in answer: |
|
|
print("β
Correctly returned 'Unable to determine' for inaccessible file") |
|
|
else: |
|
|
print("β Should have returned 'Unable to determine'") |
|
|
|
|
|
except Exception as e: |
|
|
print(f"β Error: {type(e).__name__}: {e}") |
|
|
|
|
|
print("-" * 80) |
|
|
|
|
|
print("\n" + "=" * 80) |
|
|
print("Multimedia handling test complete!") |
|
|
print("=" * 80) |
|
|
|
|
|
if __name__ == "__main__": |
|
|
|
|
|
if not os.getenv("ANTHROPIC_API_KEY"): |
|
|
print("Error: ANTHROPIC_API_KEY not found in environment variables") |
|
|
print("Please set it in your .env file") |
|
|
exit(1) |
|
|
|
|
|
test_multimedia_questions() |