Spaces:
Sleeping
Sleeping
fix temporal charts: filter out empty time periods to prevent sparse data gaps
Browse files- src/streamlit_app.py +15 -6
src/streamlit_app.py
CHANGED
|
@@ -550,13 +550,16 @@ def create_temporal_visualizations(df: pd.DataFrame) -> Dict[str, go.Figure]:
|
|
| 550 |
)
|
| 551 |
figures['heatmap'] = fig_heatmap
|
| 552 |
|
| 553 |
-
# 2. Time series with rolling average
|
| 554 |
daily_stats = df.set_index('created_utc').resample('D').agg({
|
| 555 |
'id': 'count',
|
| 556 |
'score': 'mean',
|
| 557 |
'num_comments': 'mean'
|
| 558 |
}).rename(columns={'id': 'post_count'})
|
| 559 |
-
|
|
|
|
|
|
|
|
|
|
| 560 |
# Calculate rolling averages
|
| 561 |
daily_stats['post_count_ma7'] = daily_stats['post_count'].rolling(window=7, min_periods=1).mean()
|
| 562 |
daily_stats['score_ma7'] = daily_stats['score'].rolling(window=7, min_periods=1).mean()
|
|
@@ -623,9 +626,12 @@ def create_temporal_visualizations(df: pd.DataFrame) -> Dict[str, go.Figure]:
|
|
| 623 |
'score': ['mean', 'sum'],
|
| 624 |
'num_comments': ['mean', 'sum']
|
| 625 |
})
|
| 626 |
-
|
|
|
|
|
|
|
|
|
|
| 627 |
fig_monthly = go.Figure()
|
| 628 |
-
|
| 629 |
fig_monthly.add_trace(go.Bar(
|
| 630 |
x=monthly_data.index,
|
| 631 |
y=monthly_data[('id', 'count')],
|
|
@@ -659,9 +665,12 @@ def create_temporal_visualizations(df: pd.DataFrame) -> Dict[str, go.Figure]:
|
|
| 659 |
'score': ['mean', 'sum'],
|
| 660 |
'num_comments': ['mean', 'sum']
|
| 661 |
})
|
| 662 |
-
|
|
|
|
|
|
|
|
|
|
| 663 |
fig_weekly = go.Figure()
|
| 664 |
-
|
| 665 |
fig_weekly.add_trace(go.Bar(
|
| 666 |
x=weekly_data.index,
|
| 667 |
y=weekly_data[('id', 'count')],
|
|
|
|
| 550 |
)
|
| 551 |
figures['heatmap'] = fig_heatmap
|
| 552 |
|
| 553 |
+
# 2. Time series with rolling average - only include days with actual data
|
| 554 |
daily_stats = df.set_index('created_utc').resample('D').agg({
|
| 555 |
'id': 'count',
|
| 556 |
'score': 'mean',
|
| 557 |
'num_comments': 'mean'
|
| 558 |
}).rename(columns={'id': 'post_count'})
|
| 559 |
+
|
| 560 |
+
# Filter out days with no posts to prevent misleading gaps
|
| 561 |
+
daily_stats = daily_stats[daily_stats['post_count'] > 0]
|
| 562 |
+
|
| 563 |
# Calculate rolling averages
|
| 564 |
daily_stats['post_count_ma7'] = daily_stats['post_count'].rolling(window=7, min_periods=1).mean()
|
| 565 |
daily_stats['score_ma7'] = daily_stats['score'].rolling(window=7, min_periods=1).mean()
|
|
|
|
| 626 |
'score': ['mean', 'sum'],
|
| 627 |
'num_comments': ['mean', 'sum']
|
| 628 |
})
|
| 629 |
+
|
| 630 |
+
# Filter out months with no posts
|
| 631 |
+
monthly_data = monthly_data[monthly_data[('id', 'count')] > 0]
|
| 632 |
+
|
| 633 |
fig_monthly = go.Figure()
|
| 634 |
+
|
| 635 |
fig_monthly.add_trace(go.Bar(
|
| 636 |
x=monthly_data.index,
|
| 637 |
y=monthly_data[('id', 'count')],
|
|
|
|
| 665 |
'score': ['mean', 'sum'],
|
| 666 |
'num_comments': ['mean', 'sum']
|
| 667 |
})
|
| 668 |
+
|
| 669 |
+
# Filter out weeks with no posts
|
| 670 |
+
weekly_data = weekly_data[weekly_data[('id', 'count')] > 0]
|
| 671 |
+
|
| 672 |
fig_weekly = go.Figure()
|
| 673 |
+
|
| 674 |
fig_weekly.add_trace(go.Bar(
|
| 675 |
x=weekly_data.index,
|
| 676 |
y=weekly_data[('id', 'count')],
|