milwright commited on
Commit
811a5c6
·
1 Parent(s): 3f17080

fix temporal charts: filter out empty time periods to prevent sparse data gaps

Browse files
Files changed (1) hide show
  1. src/streamlit_app.py +15 -6
src/streamlit_app.py CHANGED
@@ -550,13 +550,16 @@ def create_temporal_visualizations(df: pd.DataFrame) -> Dict[str, go.Figure]:
550
  )
551
  figures['heatmap'] = fig_heatmap
552
 
553
- # 2. Time series with rolling average
554
  daily_stats = df.set_index('created_utc').resample('D').agg({
555
  'id': 'count',
556
  'score': 'mean',
557
  'num_comments': 'mean'
558
  }).rename(columns={'id': 'post_count'})
559
-
 
 
 
560
  # Calculate rolling averages
561
  daily_stats['post_count_ma7'] = daily_stats['post_count'].rolling(window=7, min_periods=1).mean()
562
  daily_stats['score_ma7'] = daily_stats['score'].rolling(window=7, min_periods=1).mean()
@@ -623,9 +626,12 @@ def create_temporal_visualizations(df: pd.DataFrame) -> Dict[str, go.Figure]:
623
  'score': ['mean', 'sum'],
624
  'num_comments': ['mean', 'sum']
625
  })
626
-
 
 
 
627
  fig_monthly = go.Figure()
628
-
629
  fig_monthly.add_trace(go.Bar(
630
  x=monthly_data.index,
631
  y=monthly_data[('id', 'count')],
@@ -659,9 +665,12 @@ def create_temporal_visualizations(df: pd.DataFrame) -> Dict[str, go.Figure]:
659
  'score': ['mean', 'sum'],
660
  'num_comments': ['mean', 'sum']
661
  })
662
-
 
 
 
663
  fig_weekly = go.Figure()
664
-
665
  fig_weekly.add_trace(go.Bar(
666
  x=weekly_data.index,
667
  y=weekly_data[('id', 'count')],
 
550
  )
551
  figures['heatmap'] = fig_heatmap
552
 
553
+ # 2. Time series with rolling average - only include days with actual data
554
  daily_stats = df.set_index('created_utc').resample('D').agg({
555
  'id': 'count',
556
  'score': 'mean',
557
  'num_comments': 'mean'
558
  }).rename(columns={'id': 'post_count'})
559
+
560
+ # Filter out days with no posts to prevent misleading gaps
561
+ daily_stats = daily_stats[daily_stats['post_count'] > 0]
562
+
563
  # Calculate rolling averages
564
  daily_stats['post_count_ma7'] = daily_stats['post_count'].rolling(window=7, min_periods=1).mean()
565
  daily_stats['score_ma7'] = daily_stats['score'].rolling(window=7, min_periods=1).mean()
 
626
  'score': ['mean', 'sum'],
627
  'num_comments': ['mean', 'sum']
628
  })
629
+
630
+ # Filter out months with no posts
631
+ monthly_data = monthly_data[monthly_data[('id', 'count')] > 0]
632
+
633
  fig_monthly = go.Figure()
634
+
635
  fig_monthly.add_trace(go.Bar(
636
  x=monthly_data.index,
637
  y=monthly_data[('id', 'count')],
 
665
  'score': ['mean', 'sum'],
666
  'num_comments': ['mean', 'sum']
667
  })
668
+
669
+ # Filter out weeks with no posts
670
+ weekly_data = weekly_data[weekly_data[('id', 'count')] > 0]
671
+
672
  fig_weekly = go.Figure()
673
+
674
  fig_weekly.add_trace(go.Bar(
675
  x=weekly_data.index,
676
  y=weekly_data[('id', 'count')],