NBA_Analysis / tasks.py
shekkari21's picture
Add NBA analysis project files
ddabbe4
"""
Task definitions for NBA data analysis workflow.
"""
from crewai import Task
from config import NBA_DATA_PATH
def create_data_engineering_task(engineer_agent, csv_path: str = None) -> Task:
"""
Create the data engineering task for processing and cleaning data.
Args:
engineer_agent: The Engineer Agent to assign this task to
csv_path: Path to CSV file (defaults to NBA_DATA_PATH from config)
Returns:
Task: Configured data engineering task
"""
data_path = csv_path or NBA_DATA_PATH
return Task(
description=f"""
Quickly examine the dataset located at {data_path}.
Your tasks (BE EFFICIENT - use tools only once):
1. Get a brief summary of the dataset structure (use get_nba_data_summary ONCE)
2. Note the key columns available
3. Verify the data is ready for analysis
IMPORTANT:
- Use get_nba_data_summary ONCE only - it provides all needed info
- Do NOT call read_nba_data or analyze_nba_data multiple times
- Keep your report concise (2-3 sentences)
- The data is already clean and ready for analysis
Provide a brief confirmation that the dataset is loaded and ready for analysis.
""",
agent=engineer_agent,
expected_output="A brief confirmation (2-3 sentences) that the dataset is loaded and ready for analysis, including key column names."
)
def create_data_analysis_task(analyst_agent, data_engineering_task: Task) -> Task:
"""
Create the data analysis task for extracting insights from NBA data.
Args:
analyst_agent: The Analyst Agent to assign this task to
data_engineering_task: The data engineering task for context
Returns:
Task: Configured data analysis task
"""
return Task(
description=f"""
Using the cleaned NBA 2024-25 dataset, perform comprehensive analysis:
Your tasks:
1. Analyze player performance metrics:
- Top performers by points, assists, rebounds
- Shooting efficiency analysis (FG%, 3P%, FT%)
- Player efficiency ratings
2. Team performance analysis:
- Win/loss records by team
- Team offensive and defensive statistics
- Team performance trends
3. Game insights:
- High-scoring games
- Close games vs blowouts
- Performance by date/period
4. Identify key patterns and trends:
- Best performing players
- Most efficient teams
- Statistical outliers
5. Provide actionable insights and recommendations
Create a comprehensive analysis report with key findings and insights.
""",
agent=analyst_agent,
expected_output="A detailed analysis report with key insights, statistical findings, top performers, team analysis, and actionable recommendations based on the NBA 2024-25 data.",
context=[data_engineering_task]
)
def create_custom_analysis_task(analyst_agent, user_query: str, data_engineering_task: Task = None, csv_path: str = None) -> Task:
"""
Create a custom data analysis task based on user input.
Args:
analyst_agent: The Analyst Agent to assign this task to
user_query: The user's custom analysis query/task
data_engineering_task: The data engineering task for context (optional for parallel execution)
csv_path: Path to CSV file (for reference in description)
Returns:
Task: Configured custom analysis task
"""
data_path = csv_path or NBA_DATA_PATH
context = [data_engineering_task] if data_engineering_task else []
return Task(
description=f"""
Using the dataset located at {data_path}, perform the following analysis as requested by the user:
{user_query}
IMPORTANT INSTRUCTIONS:
1. For queries requiring aggregations (sum, count, average, top N, etc.), you MUST use the 'analyze_nba_data' tool.
2. The 'analyze_nba_data' tool allows you to execute pandas code for grouping, aggregating, sorting, and filtering.
3. Examples of when to use 'analyze_nba_data':
- Finding top players by statistics (e.g., "top 5 three-point shooters")
- Calculating totals or averages per player/team
- Grouping and aggregating data
- Statistical analysis requiring groupby operations
4. Use 'semantic_search_nba_data' only for finding specific game records or examples, NOT for aggregations.
5. Plan your analysis: First understand what data you need, then use the appropriate tool to get aggregated results.
Steps to follow:
1. If the query asks for "top N" or aggregations, use analyze_nba_data with pandas groupby operations
2. For "top 5 three-point shooters": group by Player, sum the '3P' column, sort descending, take top 5
3. Present the results clearly with player names and their statistics
Provide a clear, comprehensive answer with relevant statistics, insights, and any supporting data from the dataset.
""",
agent=analyst_agent,
expected_output="A detailed analysis report addressing the user's query with relevant insights, statistics, and findings from the data.",
context=context
)
def create_storyteller_task(storyteller_agent, analysis_task: Task) -> Task:
"""
Create a storyteller task that creates headlines and storylines from the analysis results.
Args:
storyteller_agent: The Storyteller Agent to assign this task to
analysis_task: The analysis task whose output will be used to create headlines and content
Returns:
Task: Configured storyteller task
"""
return Task(
description="""
Review the data analysis results and create engaging headlines and compelling storylines that bring the data to life.
Your tasks:
1. Read and understand the analysis results thoroughly
2. Identify the most important and interesting findings
3. Create 3-5 compelling headlines that:
- Are catchy and attention-grabbing
- Accurately reflect the key insights
- Use engaging sports journalism language
- Are suitable for display to users
4. Write engaging storylines/content for each headline that:
- Tells a story about the findings
- Provides context and narrative around the statistics
- Makes the data come alive with compelling prose
- Explains why these insights matter
- Uses vivid language and storytelling techniques
- Is 2-3 paragraphs per storyline (enough to be engaging but concise)
5. Format your output as follows:
HEADLINES:
[List of 3-5 headlines, one per line]
STORYLINES:
[For each headline, write 2-3 paragraphs of engaging content that tells the story behind the data]
Make both the headlines and storylines exciting, memorable, and true to the data insights.
Write like a sports journalist who knows how to make statistics compelling and human.
""",
agent=storyteller_agent,
expected_output="A formatted output with 3-5 engaging headlines followed by detailed storylines (2-3 paragraphs each) that bring the data analysis to life with compelling narrative and context.",
context=[analysis_task]
)