Spaces:
Running
Running
| ## This script provides a Gradio interface for gathering, clustering, summarizing, and analyzing news articles with sentiment analysis and topic modeling. | |
| import gather_news | |
| import pandas as pd | |
| import cluster_news | |
| import summarizer | |
| import analyze_sentiment | |
| import extract_news | |
| import gradio as gr | |
| import plotly.express as px | |
| def plot_topic_frequency(result): | |
| df = result["dataframe"] | |
| topic_counts = df["cluster_label"].value_counts().reset_index() | |
| topic_counts.columns = ["Topic", "Count"] | |
| fig = px.bar(topic_counts, x="Topic", y="Count", title="Topic Frequency", color="Topic") | |
| fig.update_layout(showlegend=False, height=350) | |
| return fig | |
| def plot_sentiment_trends(result): | |
| df = result["dataframe"] | |
| sentiment_counts = df["sentiment"].value_counts().reset_index() | |
| sentiment_counts.columns = ["Sentiment", "Count"] | |
| fig = px.pie(sentiment_counts, names="Sentiment", values="Count", title="Sentiment Distribution") | |
| fig.update_traces(textinfo='label+percent') | |
| fig.update_layout(height=350) | |
| return fig | |
| def render_top_clusters_table(result, top_n=5): | |
| df = result["dataframe"] | |
| cluster_counts = df["cluster_label"].value_counts().reset_index() | |
| cluster_counts.columns = ["Cluster", "Articles"] | |
| top_clusters = cluster_counts.head(top_n) | |
| return top_clusters | |
| def fetch_and_process_latest_news(sentiment_filters): | |
| articles = gather_news.fetch_newsapi_top_headlines() | |
| return process_and_display_articles(articles, sentiment_filters, "Top Headlines") | |
| def fetch_and_process_topic_news(topic, sentiment_filters): | |
| articles = gather_news.fetch_newsapi_everything(topic) | |
| return process_and_display_articles(articles, sentiment_filters, topic or "Topic") | |
| def process_and_display_articles(articles, sentiment_filters, topic_label): | |
| if not articles: | |
| return sentiment_filters, "", "", "", "", "", None, None, None, gr.update(visible=False) | |
| articles = sorted(articles, key=lambda x: x.get("publishedAt", ""), reverse=True) | |
| extracted_articles = extract_summarize_and_analyze_articles(articles) | |
| deduped_articles = deduplicate_articles(extracted_articles) | |
| if not deduped_articles: | |
| return sentiment_filters, "", "", "", "", "", None, None, None, gr.update(visible=False) | |
| df = pd.DataFrame(deduped_articles) | |
| result = cluster_news.cluster_and_label_articles(df, content_column="content", summary_column="summary") | |
| cluster_md_blocks = display_clusters_as_columns_grouped_by_sentiment(result, sentiment_filters) | |
| csv_file, _ = save_clustered_articles(result["dataframe"], topic_label) | |
| # Analytics | |
| topic_fig = plot_topic_frequency(result) | |
| sentiment_fig = plot_sentiment_trends(result) | |
| top_clusters_table = render_top_clusters_table(result) | |
| return sentiment_filters, *cluster_md_blocks, csv_file, topic_fig, sentiment_fig, top_clusters_table, gr.update(visible=True) | |
| def extract_summarize_and_analyze_articles(articles): | |
| extracted_articles = [] | |
| for article in articles: | |
| content = article.get("text") or article.get("content") | |
| if not content: | |
| continue | |
| title = article.get("title", "No title") | |
| summary = summarizer.generate_summary(content) | |
| sentiment, score = analyze_sentiment.analyze_summary(summary) | |
| extracted_articles.append({ | |
| "title": title, | |
| "url": article.get("url"), | |
| "source": article.get("source", "Unknown"), | |
| "author": article.get("author", "Unknown"), | |
| "publishedAt": article.get("publishedAt", "Unknown"), | |
| "content": content, | |
| "summary": summary, | |
| "sentiment": sentiment, | |
| "score": score | |
| }) | |
| return extracted_articles | |
| def deduplicate_articles(articles): | |
| seen_urls = set() | |
| seen_title_source = set() | |
| seen_title_summary = set() | |
| deduped = [] | |
| for art in articles: | |
| url = art.get("url") | |
| title = art.get("title", "").strip().lower() | |
| source = art.get("source", "").strip().lower() | |
| summary = art.get("summary", "").strip().lower() | |
| key_title_source = (title, source) | |
| key_title_summary = (title, summary) | |
| if url and url in seen_urls: | |
| continue | |
| if key_title_source in seen_title_source: | |
| continue | |
| if key_title_summary in seen_title_summary: | |
| continue | |
| deduped.append(art) | |
| if url: | |
| seen_urls.add(url) | |
| seen_title_source.add(key_title_source) | |
| seen_title_summary.add(key_title_summary) | |
| return deduped | |
| def extract_summarize_and_analyze_content_from_urls(urls): | |
| articles = extract_news.extract_news_articles(urls) | |
| return extract_summarize_and_analyze_articles(articles) | |
| def display_clusters_as_columns_grouped_by_sentiment(result, sentiment_filters=None): | |
| df = result["dataframe"] | |
| cluster_primary_topics = result.get("cluster_primary_topics", {}) | |
| cluster_related_topics = result.get("cluster_related_topics", {}) | |
| df["sentiment"] = df["sentiment"].str.capitalize() | |
| if sentiment_filters: | |
| df = df[df["sentiment"].isin(sentiment_filters)] | |
| if df.empty: | |
| return ["### β οΈ No matching articles."] + [""] * 4 | |
| clusters = df.groupby("cluster_label") | |
| markdown_blocks = [] | |
| for cluster_label, articles in clusters: | |
| cluster_md = f"<div style='border:2px solid #e0e0e0; border-radius:10px; margin-bottom:18px; padding:18px; background: #f9f9fa;'>" | |
| cluster_md += f"<h3 style='color:#2d6cdf;'>π§© Cluster: {cluster_label}</h3>" | |
| lda_topics = articles["lda_topics"].iloc[0] if "lda_topics" in articles else "" | |
| if lda_topics: | |
| cluster_md += f"<b style='color:#0d47a1;'>Main Themes:</b> <span style='color:#1976d2'>{lda_topics}</span><br>" | |
| primary = cluster_primary_topics.get(cluster_label, []) | |
| if primary: | |
| cluster_md += f"<b style='color:#1b5e20;'>Primary Topics:</b> <span style='color:#388e3c'>{', '.join(primary)}</span><br>" | |
| related = cluster_related_topics.get(cluster_label, []) | |
| if related: | |
| cluster_md += f"<b style='color:#616161;'>Related Topics:</b> <span style='color:#757575'>{', '.join(related)}</span><br>" | |
| cluster_md += f"<b>Articles:</b> {len(articles)}<br><br>" | |
| for sentiment in ["Positive", "Neutral", "Negative"]: | |
| sentiment_articles = articles[articles["sentiment"] == sentiment] | |
| if not sentiment_articles.empty: | |
| color = {"Positive": "#e8f5e9", "Neutral": "#e3f2fd", "Negative": "#ffebee"}[sentiment] | |
| border = {"Positive": "#43a047", "Neutral": "#1976d2", "Negative": "#c62828"}[sentiment] | |
| sentiment_label = { | |
| "Positive": "Positive News", | |
| "Neutral": "Neutral News", | |
| "Negative": "Negative News" | |
| }[sentiment] | |
| cluster_md += ( | |
| f"<div style='background:{color}; border-left:6px solid {border}; border-radius:6px; margin-bottom:10px; padding:10px;'>" | |
| f"<span style='font-size:1.2em;'><b>{sentiment_label} ({len(sentiment_articles)})</b></span><br>" | |
| ) | |
| for _, article in sentiment_articles.iterrows(): | |
| cluster_md += ( | |
| f"<div style='margin:10px 0 10px 0; padding:10px; border-bottom:1px solid #e0e0e0;'>" | |
| f"<span style='font-weight:bold; color:#37474f;'>π° {article['title']}</span><br>" | |
| f"<span style='font-size:0.95em;'>" | |
| f"<b>Source:</b> {article['source']}<br>" | |
| f"<details><summary style='cursor:pointer; color:#1976d2;'><strong>Summary</strong></summary>" | |
| f"<div style='margin-left:10px; color:#424242;'>{article['summary']}</div></details>" | |
| f"<a href='{article['url']}' target='_blank' style='color:#1976d2;'>Read Full Article</a>" | |
| f"</span></div>" | |
| ) | |
| cluster_md += "</div>" | |
| cluster_md += "</div>" | |
| markdown_blocks.append(cluster_md) | |
| while len(markdown_blocks) < 5: | |
| markdown_blocks.append("") | |
| return markdown_blocks[:5] | |
| def save_clustered_articles(df, topic): | |
| if df.empty: | |
| return None, None | |
| csv_file = f"{topic.replace(' ', '_')}_clustered_articles.csv" | |
| df.to_csv(csv_file, index=False) | |
| return csv_file, None | |
| def update_ui_with_columns(topic, urls, sentiment_filters): | |
| extracted_articles = [] | |
| if topic and topic.strip(): | |
| return fetch_and_process_topic_news(topic, sentiment_filters) | |
| if urls: | |
| url_list = [url.strip() for url in urls.split("\n") if url.strip()] | |
| extracted_articles.extend(extract_summarize_and_analyze_content_from_urls(url_list)) | |
| if not extracted_articles: | |
| return sentiment_filters, "", "", "", "", "", None, None, None, gr.update(visible=False) | |
| deduped_articles = deduplicate_articles(extracted_articles) | |
| df = pd.DataFrame(deduped_articles) | |
| result = cluster_news.cluster_and_label_articles(df, content_column="content", summary_column="summary") | |
| cluster_md_blocks = display_clusters_as_columns_grouped_by_sentiment(result, sentiment_filters) | |
| csv_file, _ = save_clustered_articles(result["dataframe"], topic or "batch_upload") | |
| topic_fig = plot_topic_frequency(result) | |
| sentiment_fig = plot_sentiment_trends(result) | |
| top_clusters_table = render_top_clusters_table(result) | |
| return sentiment_filters, *cluster_md_blocks, csv_file, topic_fig, sentiment_fig, top_clusters_table, gr.update(visible=True) | |
| def clear_interface(): | |
| return ( | |
| "", # topic_input | |
| ["Positive", "Neutral", "Negative"],# sentiment_filter | |
| "", # urls_input | |
| "", "", "", "", "", # cluster columns 0β4 | |
| gr.update(value=None), # csv_output (reset download file) | |
| None, None, None, # topic_fig, sentiment_fig, top_clusters_table | |
| gr.update(visible=False) # Hide Clustered News Digest section | |
| ) | |
| with gr.Blocks(theme=gr.themes.Base(), css=""" | |
| .gr-markdown { margin: 10px; } | |
| .analytics-card {background: #f5f7fa; border-radius: 10px; padding: 18px; margin-bottom: 18px;} | |
| """) as demo: | |
| gr.Markdown( | |
| "<h1 style='text-align:center;'>π° Quick Pulse</h1>" | |
| "<h3 style='text-align:center; color:#1976d2;'>AI-Powered News Summarization with Real-Time Sentiment and Topic Insights</h3>" | |
| "<p style='text-align:center;'>From headlines to insight, Quick Pulse summarizes news stories, captures emotional context, clusters related topics, and provides analytics at a glance.</p>" | |
| ) | |
| with gr.Row(): | |
| with gr.Column(scale=2): | |
| topic_input = gr.Textbox(label="Enter Topic", placeholder="e.g. climate change") | |
| sentiment_filter = gr.CheckboxGroup(choices=["Positive", "Neutral", "Negative"], value=["Positive", "Neutral", "Negative"], label="Sentiment Filter") | |
| with gr.Accordion("π Enter Multiple URLs", open=False): | |
| urls_input = gr.Textbox(label="Enter URLs (newline separated)", lines=4) | |
| with gr.Row(): | |
| submit_button = gr.Button(" Generate Digest", scale=1) | |
| latest_news_button = gr.Button("Fetch & Summarize Top News", scale=1) | |
| clear_button = gr.Button(" Clear", scale=1) | |
| csv_output = gr.File(label="π Download Clustered Digest CSV") | |
| with gr.Column(scale=3): | |
| with gr.Row(): | |
| topic_fig = gr.Plot(label="Topic Frequency") | |
| sentiment_fig = gr.Plot(label="Sentiment Trends") | |
| top_clusters_table = gr.Dataframe(label="Top Clusters") | |
| gr.Markdown("---") | |
| clustered_digest_section = gr.Group(visible=False) | |
| with clustered_digest_section: | |
| gr.Markdown("<h3 style='color:#1976d2;'>Clustered News Digest</h3>") | |
| with gr.Row(): | |
| column_0 = gr.Markdown() | |
| column_1 = gr.Markdown() | |
| column_2 = gr.Markdown() | |
| column_3 = gr.Markdown() | |
| column_4 = gr.Markdown() | |
| submit_button.click( | |
| fn=update_ui_with_columns, | |
| inputs=[topic_input, urls_input, sentiment_filter], | |
| outputs=[ | |
| sentiment_filter, | |
| column_0, column_1, column_2, column_3, column_4, | |
| csv_output, | |
| topic_fig, sentiment_fig, top_clusters_table, | |
| clustered_digest_section | |
| ] | |
| ) | |
| latest_news_button.click( | |
| fn=fetch_and_process_latest_news, | |
| inputs=[sentiment_filter], | |
| outputs=[ | |
| sentiment_filter, | |
| column_0, column_1, column_2, column_3, column_4, | |
| csv_output, | |
| topic_fig, sentiment_fig, top_clusters_table, | |
| clustered_digest_section | |
| ] | |
| ) | |
| clear_button.click( | |
| fn=clear_interface, | |
| inputs=[], | |
| outputs=[ | |
| topic_input, sentiment_filter, urls_input, | |
| column_0, column_1, column_2, column_3, column_4, | |
| csv_output, | |
| topic_fig, sentiment_fig, top_clusters_table, | |
| clustered_digest_section | |
| ] | |
| ) | |
| if __name__ == "__main__": | |
| demo.launch() |