File size: 3,969 Bytes
1637cd5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
#!/usr/bin/env python3
"""
Test multimedia handling for GAIA agent
"""

import os
from dotenv import load_dotenv

# Load environment variables
load_dotenv()

# Import the agent
from app import LangGraphAgent

def test_multimedia_questions():
    """Test questions that involve multimedia content"""
    
    print("Testing GAIA agent with multimedia questions...")
    print("=" * 80)
    
    # Initialize agent
    agent = LangGraphAgent()
    
    # Test questions from the GAIA benchmark that involve multimedia
    test_questions = [
        # YouTube video question
        {
            "question": 'In the video https://www.youtube.com/watch?v=1htKBjuUWec, Verma claims the existence of "a "moat" in the education system that provides a systemic advantage for those who know about it and can get into the pipeline." Verma\'s "moat" is a well-known advantage for students. What is the four-letter abbreviation used to describe this systemic advantage?',
            "expected": "Should extract transcript and find STEM"
        },
        
        # Image question (should return "Unable to determine")
        {
            "question": "Look at the attached image and tell me what color is the car?",
            "expected": "Unable to determine without access to image files"
        },
        
        # Excel file question (should return "Unable to determine")
        {
            "question": "What is the sum of all values in column B of the attached Excel file?",
            "expected": "Unable to determine without access to Excel files"
        },
        
        # Audio question (should return "Unable to determine")
        {
            "question": "What song is playing in the attached audio file?",
            "expected": "Unable to determine without access to audio files"
        },
        
        # PDF question (should return "Unable to determine")
        {
            "question": "What is written on page 3 of the attached PDF?",
            "expected": "Unable to determine without access to PDF files"
        },
        
        # Another YouTube question with shortened URL
        {
            "question": "In the YouTube video at https://youtu.be/dQw4w9WgXcQ, what is the main theme?",
            "expected": "Should extract transcript from Rick Astley video"
        }
    ]
    
    # Test each question
    for i, test_case in enumerate(test_questions, 1):
        question = test_case["question"]
        expected = test_case["expected"]
        
        print(f"\nTest {i}: {question[:80]}...")
        print(f"Expected behavior: {expected}")
        
        try:
            # Get the answer
            answer = agent.run(question)
            
            print(f"Answer: {answer}")
            
            # Check if multimedia was handled appropriately
            if "youtube" in question.lower() or "youtu.be" in question.lower():
                if "Unable to determine" in answer:
                    print("❌ Failed to extract YouTube transcript")
                else:
                    print("βœ… Successfully handled YouTube content")
            elif any(keyword in question.lower() for keyword in ["image", "excel", "audio", "pdf", "attached"]):
                if "Unable to determine" in answer:
                    print("βœ… Correctly returned 'Unable to determine' for inaccessible file")
                else:
                    print("❌ Should have returned 'Unable to determine'")
            
        except Exception as e:
            print(f"❌ Error: {type(e).__name__}: {e}")
        
        print("-" * 80)
    
    print("\n" + "=" * 80)
    print("Multimedia handling test complete!")
    print("=" * 80)

if __name__ == "__main__":
    # Check for API key
    if not os.getenv("ANTHROPIC_API_KEY"):
        print("Error: ANTHROPIC_API_KEY not found in environment variables")
        print("Please set it in your .env file")
        exit(1)
    
    test_multimedia_questions()