Spaces:

shekkari21
/

NBA_Analysis

Sleeping

App Files Files Community

NBA_Analysis / tasks.py

shekkari21

Add NBA analysis project files

ddabbe4 about 2 months ago

raw

history blame contribute delete

7.7 kB

	"""
	Task definitions for NBA data analysis workflow.
	"""
	from crewai import Task
	from config import NBA_DATA_PATH


	def create_data_engineering_task(engineer_agent, csv_path: str = None) -> Task:
	"""
	Create the data engineering task for processing and cleaning data.

	Args:
	engineer_agent: The Engineer Agent to assign this task to
	csv_path: Path to CSV file (defaults to NBA_DATA_PATH from config)

	Returns:
	Task: Configured data engineering task
	"""
	data_path = csv_path or NBA_DATA_PATH

	return Task(
	description=f"""
	Quickly examine the dataset located at {data_path}.

	Your tasks (BE EFFICIENT - use tools only once):
	1. Get a brief summary of the dataset structure (use get_nba_data_summary ONCE)
	2. Note the key columns available
	3. Verify the data is ready for analysis

	IMPORTANT:
	- Use get_nba_data_summary ONCE only - it provides all needed info
	- Do NOT call read_nba_data or analyze_nba_data multiple times
	- Keep your report concise (2-3 sentences)
	- The data is already clean and ready for analysis

	Provide a brief confirmation that the dataset is loaded and ready for analysis.
	""",
	agent=engineer_agent,
	expected_output="A brief confirmation (2-3 sentences) that the dataset is loaded and ready for analysis, including key column names."
	)


	def create_data_analysis_task(analyst_agent, data_engineering_task: Task) -> Task:
	"""
	Create the data analysis task for extracting insights from NBA data.

	Args:
	analyst_agent: The Analyst Agent to assign this task to
	data_engineering_task: The data engineering task for context

	Returns:
	Task: Configured data analysis task
	"""
	return Task(
	description=f"""
	Using the cleaned NBA 2024-25 dataset, perform comprehensive analysis:

	Your tasks:
	1. Analyze player performance metrics:
	- Top performers by points, assists, rebounds
	- Shooting efficiency analysis (FG%, 3P%, FT%)
	- Player efficiency ratings
	2. Team performance analysis:
	- Win/loss records by team
	- Team offensive and defensive statistics
	- Team performance trends
	3. Game insights:
	- High-scoring games
	- Close games vs blowouts
	- Performance by date/period
	4. Identify key patterns and trends:
	- Best performing players
	- Most efficient teams
	- Statistical outliers
	5. Provide actionable insights and recommendations

	Create a comprehensive analysis report with key findings and insights.
	""",
	agent=analyst_agent,
	expected_output="A detailed analysis report with key insights, statistical findings, top performers, team analysis, and actionable recommendations based on the NBA 2024-25 data.",
	context=[data_engineering_task]
	)


	def create_custom_analysis_task(analyst_agent, user_query: str, data_engineering_task: Task = None, csv_path: str = None) -> Task:
	"""
	Create a custom data analysis task based on user input.

	Args:
	analyst_agent: The Analyst Agent to assign this task to
	user_query: The user's custom analysis query/task
	data_engineering_task: The data engineering task for context (optional for parallel execution)
	csv_path: Path to CSV file (for reference in description)

	Returns:
	Task: Configured custom analysis task
	"""
	data_path = csv_path or NBA_DATA_PATH
	context = [data_engineering_task] if data_engineering_task else []

	return Task(
	description=f"""
	Using the dataset located at {data_path}, perform the following analysis as requested by the user:

	{user_query}

	IMPORTANT INSTRUCTIONS:
	1. For queries requiring aggregations (sum, count, average, top N, etc.), you MUST use the 'analyze_nba_data' tool.
	2. The 'analyze_nba_data' tool allows you to execute pandas code for grouping, aggregating, sorting, and filtering.
	3. Examples of when to use 'analyze_nba_data':
	- Finding top players by statistics (e.g., "top 5 three-point shooters")
	- Calculating totals or averages per player/team
	- Grouping and aggregating data
	- Statistical analysis requiring groupby operations
	4. Use 'semantic_search_nba_data' only for finding specific game records or examples, NOT for aggregations.
	5. Plan your analysis: First understand what data you need, then use the appropriate tool to get aggregated results.

	Steps to follow:
	1. If the query asks for "top N" or aggregations, use analyze_nba_data with pandas groupby operations
	2. For "top 5 three-point shooters": group by Player, sum the '3P' column, sort descending, take top 5
	3. Present the results clearly with player names and their statistics

	Provide a clear, comprehensive answer with relevant statistics, insights, and any supporting data from the dataset.
	""",
	agent=analyst_agent,
	expected_output="A detailed analysis report addressing the user's query with relevant insights, statistics, and findings from the data.",
	context=context
	)


	def create_storyteller_task(storyteller_agent, analysis_task: Task) -> Task:
	"""
	Create a storyteller task that creates headlines and storylines from the analysis results.

	Args:
	storyteller_agent: The Storyteller Agent to assign this task to
	analysis_task: The analysis task whose output will be used to create headlines and content

	Returns:
	Task: Configured storyteller task
	"""
	return Task(
	description="""
	Review the data analysis results and create engaging headlines and compelling storylines that bring the data to life.

	Your tasks:
	1. Read and understand the analysis results thoroughly
	2. Identify the most important and interesting findings
	3. Create 3-5 compelling headlines that:
	- Are catchy and attention-grabbing
	- Accurately reflect the key insights
	- Use engaging sports journalism language
	- Are suitable for display to users

	4. Write engaging storylines/content for each headline that:
	- Tells a story about the findings
	- Provides context and narrative around the statistics
	- Makes the data come alive with compelling prose
	- Explains why these insights matter
	- Uses vivid language and storytelling techniques
	- Is 2-3 paragraphs per storyline (enough to be engaging but concise)

	5. Format your output as follows:
	HEADLINES:
	[List of 3-5 headlines, one per line]

	STORYLINES:
	[For each headline, write 2-3 paragraphs of engaging content that tells the story behind the data]

	Make both the headlines and storylines exciting, memorable, and true to the data insights.
	Write like a sports journalist who knows how to make statistics compelling and human.
	""",
	agent=storyteller_agent,
	expected_output="A formatted output with 3-5 engaging headlines followed by detailed storylines (2-3 paragraphs each) that bring the data analysis to life with compelling narrative and context.",
	context=[analysis_task]
	)