File size: 30,127 Bytes
6a42990
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
"""
Provides mechanisms for creating understanding the characteristics of agent populations, such as
their age distribution, typical interests, and so on.

Guideline for plotting the methods: all plot methods should also return a Pandas dataframe with the data used for 
plotting.
"""
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
from typing import List, Dict, Any, Optional, Union, Callable
from collections import Counter, defaultdict
import warnings

# Handle TinyPerson import gracefully
try:
    from tinytroupe.agent import TinyPerson
except ImportError:
    # Fallback if import fails
    TinyPerson = None


class Profiler:
    """
    Advanced profiler for analyzing agent population characteristics with support for 
    complex attributes, statistical analysis, and comprehensive visualizations.
    """

    def __init__(self, attributes: List[str] = ["age", "occupation.title", "nationality"]) -> None: 
        self.attributes = attributes
        self.attributes_distributions = {}  # attribute -> DataFrame
        self.agents_data = None  # Store processed agent data
        self.analysis_results = {}  # Store various analysis results
        
        # Set up better plotting style
        plt.style.use('default')
        sns.set_palette("husl")

    def profile(self, agents: Union[List[dict], List[TinyPerson]], plot: bool = True, 
                advanced_analysis: bool = True) -> Dict[str, Any]:   
        """
        Profiles the given agents with comprehensive analysis.

        Args:
            agents: The agents to be profiled (either dicts or TinyPerson objects)
            plot: Whether to generate visualizations
            advanced_analysis: Whether to perform advanced statistical analysis
        
        Returns:
            Dictionary containing all analysis results
        """
        # Convert agents to consistent format
        self.agents_data = self._prepare_agent_data(agents)
        
        # Basic attribute distributions
        self.attributes_distributions = self._compute_attributes_distributions(self.agents_data)
        
        if advanced_analysis:
            self._perform_advanced_analysis()
        
        if plot:
            self.render(advanced=advanced_analysis)
            
        return {
            'distributions': self.attributes_distributions,
            'analysis': self.analysis_results,
            'summary_stats': self._generate_summary_statistics()
        }

    def _prepare_agent_data(self, agents: Union[List[dict], List[TinyPerson]]) -> List[Dict[str, Any]]:
        """Convert agents to a consistent dictionary format for analysis."""
        processed_agents = []
        
        for agent in agents:
            if isinstance(agent, TinyPerson):
                # Extract data from TinyPerson object
                agent_data = self._extract_tinyperson_data(agent)
            else:
                agent_data = agent.copy()
            
            processed_agents.append(agent_data)
        
        return processed_agents

    def _extract_tinyperson_data(self, agent: TinyPerson) -> Dict[str, Any]:
        """Extract comprehensive data from a TinyPerson object."""
        data = {}
        
        # Basic persona attributes
        if hasattr(agent, '_persona') and agent._persona:
            data.update(agent._persona)
        
        # Mental state information
        if hasattr(agent, '_mental_state') and agent._mental_state:
            mental_state = agent._mental_state
            data['current_emotions'] = mental_state.get('emotions')
            data['current_goals'] = mental_state.get('goals', [])
            data['current_context'] = mental_state.get('context', [])
            data['accessible_agents_count'] = len(mental_state.get('accessible_agents', []))
        
        # Behavioral metrics
        if hasattr(agent, 'actions_count'):
            data['actions_count'] = agent.actions_count
        if hasattr(agent, 'stimuli_count'):
            data['stimuli_count'] = agent.stimuli_count
            
        # Memory statistics
        if hasattr(agent, 'episodic_memory') and agent.episodic_memory:
            try:
                # Get total memory size including both committed memory and current episode buffer
                memory_size = len(agent.episodic_memory.memory) + len(agent.episodic_memory.episodic_buffer)
                data['episodic_memory_size'] = memory_size
            except AttributeError:
                # Fallback if memory structure is different
                data['episodic_memory_size'] = 0
        
        # Social connections
        if hasattr(agent, '_accessible_agents'):
            data['social_connections'] = len(agent._accessible_agents)
        
        return data

    def _perform_advanced_analysis(self):
        """Perform advanced statistical and behavioral analysis."""
        self.analysis_results = {}
        
        # Demographic analysis
        self.analysis_results['demographics'] = self._analyze_demographics()
        
        # Behavioral patterns
        self.analysis_results['behavioral_patterns'] = self._analyze_behavioral_patterns()
        
        # Social network analysis
        self.analysis_results['social_analysis'] = self._analyze_social_patterns()
        
        # Personality clustering
        self.analysis_results['personality_clusters'] = self._analyze_personality_clusters()
        
        # Correlations
        self.analysis_results['correlations'] = self._analyze_correlations()

    def _analyze_demographics(self) -> Dict[str, Any]:
        """Analyze demographic patterns in the population."""
        demographics = {}
        
        # Age analysis
        ages = [agent.get('age') for agent in self.agents_data if agent.get('age') is not None]
        if ages:
            demographics['age_stats'] = {
                'mean': np.mean(ages),
                'median': np.median(ages),
                'std': np.std(ages),
                'range': (min(ages), max(ages)),
                'distribution': 'normal' if self._test_normality(ages) else 'non-normal'
            }
        
        # Occupation diversity
        occupations = [agent.get('occupation', {}).get('title') if isinstance(agent.get('occupation'), dict) 
                      else agent.get('occupation') for agent in self.agents_data]
        occupations = [occ for occ in occupations if occ is not None]
        
        if occupations:
            occ_counts = Counter(occupations)
            demographics['occupation_diversity'] = {
                'unique_count': len(occ_counts),
                'diversity_index': self._calculate_diversity_index(occ_counts),
                'most_common': occ_counts.most_common(5)
            }
        
        # Geographic distribution
        nationalities = [agent.get('nationality') for agent in self.agents_data if agent.get('nationality')]
        if nationalities:
            nat_counts = Counter(nationalities)
            demographics['geographic_diversity'] = {
                'unique_countries': len(nat_counts),
                'diversity_index': self._calculate_diversity_index(nat_counts),
                'distribution': dict(nat_counts)
            }
        
        return demographics

    def _analyze_behavioral_patterns(self) -> Dict[str, Any]:
        """Analyze behavioral patterns across the population."""
        behavioral = {}
        
        # Activity levels
        actions_data = [agent.get('actions_count', 0) for agent in self.agents_data]
        stimuli_data = [agent.get('stimuli_count', 0) for agent in self.agents_data]
        
        if any(actions_data):
            behavioral['activity_levels'] = {
                'actions_mean': np.mean(actions_data),
                'actions_std': np.std(actions_data),
                'stimuli_mean': np.mean(stimuli_data),
                'stimuli_std': np.std(stimuli_data),
                'activity_ratio': np.mean(actions_data) / max(np.mean(stimuli_data), 1)
            }
        
        # Goal patterns
        all_goals = []
        for agent in self.agents_data:
            goals = agent.get('current_goals', [])
            if isinstance(goals, list):
                all_goals.extend(goals)
        
        if all_goals:
            goal_counts = Counter(all_goals)
            behavioral['goal_patterns'] = {
                'common_goals': goal_counts.most_common(10),
                'goal_diversity': self._calculate_diversity_index(goal_counts)
            }
        
        return behavioral

    def _analyze_social_patterns(self) -> Dict[str, Any]:
        """Analyze social connection patterns."""
        social = {}
        
        # Social connectivity
        connections = [agent.get('social_connections', 0) for agent in self.agents_data]
        accessible_counts = [agent.get('accessible_agents_count', 0) for agent in self.agents_data]
        
        if any(connections + accessible_counts):
            social['connectivity'] = {
                'avg_connections': np.mean(connections),
                'avg_accessible': np.mean(accessible_counts),
                'connectivity_distribution': self._categorize_connectivity(connections),
                'social_isolation_rate': sum(1 for c in connections if c == 0) / len(connections)
            }
        
        return social

    def _analyze_personality_clusters(self) -> Dict[str, Any]:
        """Identify personality-based clusters if Big Five data is available."""
        personality = {}
        
        # Extract Big Five traits if available
        big_five_data = []
        for agent in self.agents_data:
            if 'big_five' in agent and isinstance(agent['big_five'], dict):
                traits = agent['big_five']
                # Convert text descriptions to numerical values (simplified approach)
                numerical_traits = {}
                for trait, value in traits.items():
                    if isinstance(value, str):
                        if 'high' in value.lower():
                            numerical_traits[trait] = 0.8
                        elif 'medium' in value.lower():
                            numerical_traits[trait] = 0.5
                        elif 'low' in value.lower():
                            numerical_traits[trait] = 0.2
                        else:
                            numerical_traits[trait] = 0.5  # Default
                    else:
                        numerical_traits[trait] = value
                
                if len(numerical_traits) == 5:  # Full Big Five
                    big_five_data.append(numerical_traits)
        
        if len(big_five_data) >= 2:  # Need minimum agents for analysis (reduced from >3 to >=2)
            df_traits = pd.DataFrame(big_five_data)
            
            # Simple clustering based on dominant traits
            personality['trait_analysis'] = {
                'average_traits': df_traits.mean().to_dict(),
                'trait_correlations': df_traits.corr().to_dict() if len(big_five_data) > 1 else {},
                'dominant_traits': self._identify_dominant_traits(df_traits)
            }
        
        return personality

    def _analyze_correlations(self) -> Dict[str, Any]:
        """Analyze correlations between different attributes."""
        correlations = {}
        
        # Create a numerical dataset for correlation analysis
        numerical_data = {}
        
        for agent in self.agents_data:
            for attr in ['age', 'actions_count', 'stimuli_count', 'social_connections']:
                if attr not in numerical_data:
                    numerical_data[attr] = []
                numerical_data[attr].append(agent.get(attr, 0))
        
        if len(numerical_data) > 1:
            df_corr = pd.DataFrame(numerical_data)
            correlation_matrix = df_corr.corr()
            
            # Find strong correlations (> 0.5)
            strong_correlations = []
            for i in range(len(correlation_matrix.columns)):
                for j in range(i+1, len(correlation_matrix.columns)):
                    corr_value = correlation_matrix.iloc[i, j]
                    if abs(corr_value) > 0.5:
                        strong_correlations.append({
                            'variables': (correlation_matrix.columns[i], correlation_matrix.columns[j]),
                            'correlation': corr_value
                        })
            
            correlations['numerical_correlations'] = strong_correlations
            correlations['correlation_matrix'] = correlation_matrix.to_dict()
        
        return correlations

    def render(self, advanced: bool = True) -> None:
        """
        Renders comprehensive visualizations of the agent population analysis.
        """
        # Basic attribute distributions
        self._plot_basic_distributions()
        
        if advanced and self.analysis_results:
            self._plot_advanced_analysis()

    def _plot_basic_distributions(self) -> None:
        """Plot basic attribute distributions with improved styling."""
        n_attrs = len(self.attributes)
        if n_attrs == 0:
            return
        
        # Calculate subplot layout
        n_cols = min(3, n_attrs)
        n_rows = (n_attrs + n_cols - 1) // n_cols
        
        fig, axes = plt.subplots(n_rows, n_cols, figsize=(5 * n_cols, 4 * n_rows))
        if n_attrs == 1:
            axes = [axes]
        elif n_rows == 1:
            axes = [axes] if n_attrs == 1 else axes
        else:
            axes = axes.flatten()
        
        for i, attribute in enumerate(self.attributes):
            ax = axes[i] if n_attrs > 1 else axes[0]
            
            if attribute in self.attributes_distributions:
                df = self.attributes_distributions[attribute]
                
                # Create better visualizations based on data type
                if len(df) <= 15:  # Categorical data
                    df.plot(kind='bar', ax=ax, color=sns.color_palette("husl", len(df)))
                    ax.set_title(f"{attribute.replace('_', ' ').title()} Distribution", fontsize=12, fontweight='bold')
                    ax.tick_params(axis='x', rotation=45)
                else:  # Many categories - use horizontal bar for readability
                    df.head(15).plot(kind='barh', ax=ax, color=sns.color_palette("husl", 15))
                    ax.set_title(f"Top 15 {attribute.replace('_', ' ').title()}", fontsize=12, fontweight='bold')
                
                ax.grid(axis='y', alpha=0.3)
                ax.set_xlabel('Count')
        
        # Hide empty subplots
        for i in range(n_attrs, len(axes)):
            axes[i].set_visible(False)
        
        plt.tight_layout()
        plt.show()

    def _plot_advanced_analysis(self) -> None:
        """Create advanced visualizations for the analysis results."""
        
        # 1. Demographics overview
        if 'demographics' in self.analysis_results:
            self._plot_demographics()
        
        # 2. Behavioral patterns
        if 'behavioral_patterns' in self.analysis_results:
            self._plot_behavioral_patterns()
        
        # 3. Correlation heatmap
        if 'correlations' in self.analysis_results and 'correlation_matrix' in self.analysis_results['correlations']:
            self._plot_correlation_heatmap()

    def _plot_demographics(self) -> None:
        """Plot demographic analysis results."""
        demo = self.analysis_results['demographics']
        
        fig, axes = plt.subplots(2, 2, figsize=(12, 10))
        fig.suptitle('Population Demographics Analysis', fontsize=16, fontweight='bold')
        
        # Age distribution
        if 'age_stats' in demo:
            ages = [agent.get('age') for agent in self.agents_data if agent.get('age') is not None]
            axes[0, 0].hist(ages, bins=10, alpha=0.7, color='skyblue', edgecolor='black')
            axes[0, 0].axvline(demo['age_stats']['mean'], color='red', linestyle='--', 
                              label=f"Mean: {demo['age_stats']['mean']:.1f}")
            axes[0, 0].set_title('Age Distribution')
            axes[0, 0].set_xlabel('Age')
            axes[0, 0].set_ylabel('Count')
            axes[0, 0].legend()
        
        # Occupation diversity
        if 'occupation_diversity' in demo:
            occ_data = demo['occupation_diversity']['most_common']
            if occ_data:
                occs, counts = zip(*occ_data)
                axes[0, 1].pie(counts, labels=occs, autopct='%1.1f%%')
                axes[0, 1].set_title('Top Occupations')
        
        # Geographic distribution
        if 'geographic_diversity' in demo:
            geo_data = demo['geographic_diversity']['distribution']
            if geo_data:
                countries = list(geo_data.keys())[:10]  # Top 10
                counts = [geo_data[c] for c in countries]
                axes[1, 0].barh(countries, counts, color='lightcoral')
                axes[1, 0].set_title('Geographic Distribution')
                axes[1, 0].set_xlabel('Count')
        
        # Diversity metrics
        diversity_metrics = []
        diversity_values = []
        
        if 'occupation_diversity' in demo:
            diversity_metrics.append('Occupation\nDiversity')
            diversity_values.append(demo['occupation_diversity']['diversity_index'])
        
        if 'geographic_diversity' in demo:
            diversity_metrics.append('Geographic\nDiversity')
            diversity_values.append(demo['geographic_diversity']['diversity_index'])
        
        if diversity_metrics:
            axes[1, 1].bar(diversity_metrics, diversity_values, color='lightgreen')
            axes[1, 1].set_title('Diversity Indices')
            axes[1, 1].set_ylabel('Diversity Score')
            axes[1, 1].set_ylim(0, 1)
        
        plt.tight_layout()
        plt.show()

    def _plot_behavioral_patterns(self) -> None:
        """Plot behavioral analysis results."""
        behavioral = self.analysis_results['behavioral_patterns']
        
        fig, axes = plt.subplots(1, 2, figsize=(12, 5))
        fig.suptitle('Behavioral Patterns Analysis', fontsize=16, fontweight='bold')
        
        # Activity levels scatter plot
        if 'activity_levels' in behavioral:
            actions_data = [agent.get('actions_count', 0) for agent in self.agents_data]
            stimuli_data = [agent.get('stimuli_count', 0) for agent in self.agents_data]
            
            axes[0].scatter(stimuli_data, actions_data, alpha=0.6, color='purple')
            axes[0].set_xlabel('Stimuli Count')
            axes[0].set_ylabel('Actions Count')
            axes[0].set_title('Activity Patterns')
            
            # Add trend line
            if len(stimuli_data) > 1 and len(actions_data) > 1:
                z = np.polyfit(stimuli_data, actions_data, 1)
                p = np.poly1d(z)
                axes[0].plot(stimuli_data, p(stimuli_data), "r--", alpha=0.8)
        
        # Goal patterns
        if 'goal_patterns' in behavioral and behavioral['goal_patterns']['common_goals']:
            goals, counts = zip(*behavioral['goal_patterns']['common_goals'][:8])
            axes[1].barh(range(len(goals)), counts, color='orange')
            axes[1].set_yticks(range(len(goals)))
            axes[1].set_yticklabels([g[:30] + '...' if len(str(g)) > 30 else str(g) for g in goals])
            axes[1].set_xlabel('Frequency')
            axes[1].set_title('Common Goals')
        
        plt.tight_layout()
        plt.show()

    def _plot_correlation_heatmap(self) -> None:
        """Plot correlation heatmap for numerical attributes."""
        corr_data = self.analysis_results['correlations']['correlation_matrix']
        corr_df = pd.DataFrame(corr_data)
        
        plt.figure(figsize=(8, 6))
        sns.heatmap(corr_df, annot=True, cmap='coolwarm', center=0, 
                   square=True, cbar_kws={'label': 'Correlation Coefficient'})
        plt.title('Attribute Correlations Heatmap', fontsize=14, fontweight='bold')
        plt.tight_layout()
        plt.show()

    def _compute_attributes_distributions(self, agents: list) -> dict:
        """
        Computes the distributions of the attributes for the agents.
        """
        distributions = {}
        for attribute in self.attributes:
            distributions[attribute] = self._compute_attribute_distribution(agents, attribute)
        
        return distributions
    
    def _compute_attribute_distribution(self, agents: list, attribute: str) -> pd.DataFrame:
        """
        Computes the distribution of a given attribute with support for nested attributes.
        """
        values = []
        
        for agent in agents:
            value = self._get_nested_attribute(agent, attribute)
            values.append(value)
        
        # Handle None values
        values = [v for v in values if v is not None]
        
        if not values:
            return pd.DataFrame()
        
        # Convert mixed types to string for consistent sorting
        try:
            value_counts = pd.Series(values).value_counts().sort_index()
        except TypeError:
            # Handle mixed data types by converting to strings
            string_values = [str(v) for v in values]
            value_counts = pd.Series(string_values).value_counts().sort_index()
        
        return value_counts

    def _get_nested_attribute(self, agent: dict, attribute: str) -> Any:
        """Get nested attribute using dot notation (e.g., 'occupation.title')."""
        keys = attribute.split('.')
        value = agent
        
        for key in keys:
            if isinstance(value, dict) and key in value:
                value = value[key]
            else:
                return None
        
        return value

    # Utility methods for advanced analysis
    def _test_normality(self, data: List[float]) -> bool:
        """Simple normality test using skewness."""
        if len(data) < 3:
            return False
        
        skewness = pd.Series(data).skew()
        return abs(skewness) < 0.3  # Stringent normality test - threshold to catch bimodal distributions

    def _calculate_diversity_index(self, counts: Counter) -> float:
        """Calculate Shannon diversity index."""
        total = sum(counts.values())
        if total <= 1:
            return 0.0
        
        diversity = 0
        for count in counts.values():
            if count > 0:
                p = count / total
                diversity -= p * np.log(p)
        
        return diversity / np.log(len(counts)) if len(counts) > 1 else 0

    def _categorize_connectivity(self, connections: List[int]) -> Dict[str, int]:
        """Categorize agents by their connectivity level."""
        categories = {'isolated': 0, 'low': 0, 'medium': 0, 'high': 0}
        
        for conn in connections:
            if conn == 0:
                categories['isolated'] += 1
            elif conn <= 2:
                categories['low'] += 1
            elif conn <= 5:
                categories['medium'] += 1
            else:
                categories['high'] += 1
        
        return categories

    def _identify_dominant_traits(self, traits_df: pd.DataFrame) -> Dict[str, str]:
        """Identify the dominant personality traits in the population."""
        trait_means = traits_df.mean()
        dominant = {}
        
        for trait, mean_value in trait_means.items():
            if mean_value > 0.6:
                dominant[trait] = 'high'
            elif mean_value < 0.4:
                dominant[trait] = 'low'
            else:
                dominant[trait] = 'moderate'
        
        return dominant

    def _generate_summary_statistics(self) -> Dict[str, Any]:
        """Generate comprehensive summary statistics."""
        summary = {
            'total_agents': len(self.agents_data),
            'attributes_analyzed': len(self.attributes),
            'data_completeness': {}
        }
        
        # Calculate data completeness for each attribute - handle empty data
        if len(self.agents_data) > 0:
            for attr in self.attributes:
                non_null_count = sum(1 for agent in self.agents_data 
                                   if self._get_nested_attribute(agent, attr) is not None)
                summary['data_completeness'][attr] = non_null_count / len(self.agents_data)
        else:
            # No agents - set all completeness to 0
            for attr in self.attributes:
                summary['data_completeness'][attr] = 0.0
        
        return summary

    def export_analysis_report(self, filename: str = "agent_population_analysis.txt") -> None:
        """Export a comprehensive text report of the analysis."""
        with open(filename, 'w', encoding="utf-8", errors="replace") as f:
            f.write("AGENT POPULATION ANALYSIS REPORT\n")
            f.write("=" * 50 + "\n\n")
            
    def export_analysis_report(self, filename: str = "agent_population_analysis.txt") -> None:
        """Export a comprehensive text report of the analysis."""
        with open(filename, 'w', encoding="utf-8", errors="replace") as f:
            f.write("AGENT POPULATION ANALYSIS REPORT\n")
            f.write("=" * 50 + "\n\n")
            
            # Summary statistics - always generate from current data
            summary = self._generate_summary_statistics()
            f.write(f"Total Agents Analyzed: {summary['total_agents']}\n")
            f.write(f"Attributes Analyzed: {summary['attributes_analyzed']}\n\n")
            
            f.write("Data Completeness:\n")
            for attr, completeness in summary['data_completeness'].items():
                f.write(f"  {attr}: {completeness:.2%}\n")
            f.write("\n")
            
            # Demographics
            if 'demographics' in self.analysis_results:
                demo = self.analysis_results['demographics']
                f.write("DEMOGRAPHICS\n")
                f.write("-" * 20 + "\n")
                
                if 'age_stats' in demo:
                    age_stats = demo['age_stats']
                    f.write(f"Age Statistics:\n")
                    f.write(f"  Mean: {age_stats['mean']:.1f} years\n")
                    f.write(f"  Median: {age_stats['median']:.1f} years\n")
                    f.write(f"  Range: {age_stats['range'][0]}-{age_stats['range'][1]} years\n\n")
                
                if 'occupation_diversity' in demo:
                    occ_div = demo['occupation_diversity']
                    f.write(f"Occupation Diversity:\n")
                    f.write(f"  Unique Occupations: {occ_div['unique_count']}\n")
                    f.write(f"  Diversity Index: {occ_div['diversity_index']:.3f}\n\n")
            
            # Behavioral patterns
            if 'behavioral_patterns' in self.analysis_results:
                behavioral = self.analysis_results['behavioral_patterns']
                f.write("BEHAVIORAL PATTERNS\n")
                f.write("-" * 20 + "\n")
                
                if 'activity_levels' in behavioral:
                    activity = behavioral['activity_levels']
                    f.write(f"Activity Levels:\n")
                    f.write(f"  Average Actions: {activity['actions_mean']:.1f}\n")
                    f.write(f"  Average Stimuli: {activity['stimuli_mean']:.1f}\n")
                    f.write(f"  Activity Ratio: {activity['activity_ratio']:.2f}\n\n")
        
        print(f"Analysis report exported to {filename}")

    def add_custom_analysis(self, name: str, analysis_func: Callable[[List[Dict]], Any]) -> None:
        """
        Add a custom analysis function that will be executed during profiling.
        
        Args:
            name: Name for the custom analysis
            analysis_func: Function that takes agent data and returns analysis results
        """
        if not hasattr(self, '_custom_analyses'):
            self._custom_analyses = {}
        
        self._custom_analyses[name] = analysis_func

    def compare_populations(self, other_agents: Union[List[dict], List[TinyPerson]], 
                          attributes: Optional[List[str]] = None) -> Dict[str, Any]:
        """
        Compare this population with another population.
        
        Args:
            other_agents: Another set of agents to compare with
            attributes: Specific attributes to compare (uses self.attributes if None)
            
        Returns:
            Comparison results
        """
        if attributes is None:
            attributes = self.attributes
        
        # Create temporary profiler for the other population
        other_profiler = Profiler(attributes)
        other_results = other_profiler.profile(other_agents, plot=False, advanced_analysis=True)
        
        comparison = {
            'population_sizes': {
                'current': len(self.agents_data),
                'comparison': len(other_profiler.agents_data)
            },
            'attribute_comparisons': {}
        }
        
        # Compare distributions for each attribute
        for attr in attributes:
            if (attr in self.attributes_distributions and 
                attr in other_profiler.attributes_distributions):
                
                current_dist = self.attributes_distributions[attr]
                other_dist = other_profiler.attributes_distributions[attr]
                
                # Statistical comparison (simplified)
                comparison['attribute_comparisons'][attr] = {
                    'current_unique_values': len(current_dist),
                    'comparison_unique_values': len(other_dist),
                    'current_top_3': current_dist.head(3).to_dict(),
                    'comparison_top_3': other_dist.head(3).to_dict()
                }
        
        return comparison