Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| import pandas as pd | |
| import cluster_news | |
| import extract_news | |
| import summarizer | |
| import analyze_sentiment | |
| import gather_news | |
| # ------------------ Utilities ------------------ | |
| def fetch_content(topic): | |
| articles = gather_news.fetch_articles_newsapi(topic) | |
| if isinstance(articles, str): | |
| articles = gather_news.fetch_articles_google(topic) | |
| if isinstance(articles, str): | |
| return None | |
| try: | |
| articles = sorted(articles, key=lambda x: x.get("publishedAt", ""), reverse=True)[:10] | |
| except Exception: | |
| return None | |
| return articles | |
| def fetch_and_process_latest_news(sentiment_filters): | |
| topic = "Top Headlines" | |
| articles = gather_news.fetch_articles_newsapi("top headlines") | |
| if isinstance(articles, str) or not articles: | |
| return sentiment_filters, "### No latest news available", "", "", "", "", None | |
| articles = sorted(articles, key=lambda x: x.get("publishedAt", ""), reverse=True)[:10] | |
| extracted_articles = extract_summarize_and_analyze_articles(articles) | |
| if not extracted_articles: | |
| return sentiment_filters, "### No content to display", "", "", "", "", None | |
| df = pd.DataFrame(extracted_articles) | |
| result = cluster_news.cluster_and_label_articles(df, content_column="content", summary_column="summary") | |
| cluster_md_blocks = display_clusters_as_columns(result, sentiment_filters) | |
| csv_file, _ = save_clustered_articles(result["dataframe"], topic) | |
| return sentiment_filters, *cluster_md_blocks, csv_file | |
| def extract_summarize_and_analyze_articles(articles): | |
| extracted_articles = [] | |
| for article in articles: | |
| url = article.get("url") | |
| if url: | |
| content, _ = extract_news.extract_full_content(url) | |
| if content: | |
| summary = summarizer.generate_summary(content) | |
| sentiment, score = analyze_sentiment.analyze_summary(summary) | |
| extracted_articles.append({ | |
| "title": article.get("title", "No title"), | |
| "url": url, | |
| "source": article.get("source", "Unknown"), | |
| "author": article.get("author", "Unknown"), | |
| "publishedAt": article.get("publishedAt", "Unknown"), | |
| "content": content, | |
| "summary": summary, | |
| "sentiment": sentiment, | |
| "score": score | |
| }) | |
| return extracted_articles | |
| def extract_summarize_and_analyze_content_from_file(files): | |
| extracted_articles = [] | |
| for file in files: | |
| with open(file.name, "r", encoding="utf-8") as f: | |
| content = f.read() | |
| if content.strip(): | |
| summary = summarizer.generate_summary(content) | |
| sentiment, score = analyze_sentiment.analyze_summary(summary) | |
| extracted_articles.append({ | |
| "title": "Custom File", | |
| "url": "N/A", | |
| "source": "Uploaded File", | |
| "author": "Unknown", | |
| "publishedAt": "Unknown", | |
| "content": content, | |
| "summary": summary, | |
| "sentiment": sentiment, | |
| "score": score | |
| }) | |
| return extracted_articles | |
| def extract_summarize_and_analyze_content_from_urls(urls): | |
| extracted_articles = [] | |
| for url in urls: | |
| content, title = extract_news.extract_full_content(url) | |
| if content: # Only proceed if content is successfully extracted | |
| summary = summarizer.generate_summary(content) | |
| sentiment, score = analyze_sentiment.analyze_summary(summary) | |
| extracted_articles.append({ | |
| "title": title if title else "Untitled Article", | |
| "url": url, | |
| "source": "External Link", | |
| "author": "Unknown", | |
| "publishedAt": "Unknown", | |
| "content": content, | |
| "summary": summary, | |
| "sentiment": sentiment, | |
| "score": score | |
| }) | |
| return extracted_articles | |
| def display_clusters_as_columns(result, sentiment_filters=None): | |
| df = result["dataframe"] | |
| detected_topics = result.get("detected_topics", {}) | |
| df["sentiment"] = df["sentiment"].str.capitalize() | |
| if sentiment_filters: | |
| df = df[df["sentiment"].isin(sentiment_filters)] | |
| if df.empty: | |
| return ["### ⚠️ No matching articles."] + [""] * 4 | |
| clusters = df.groupby("cluster_label") | |
| markdown_blocks = [] | |
| for cluster_label, articles in clusters: | |
| cluster_md = f"### 🧩 Cluster {cluster_label}\n" | |
| if cluster_label in detected_topics: | |
| topics = detected_topics[cluster_label] | |
| cluster_md += f"**Primary Topic:** {topics['primary_focus']}\n\n" | |
| if topics["related_topics"]: | |
| cluster_md += f"**Related Topics:** {', '.join(topics['related_topics'])}\n\n" | |
| cluster_md += f"**Articles:** {len(articles)}\n\n" | |
| for _, article in articles.iterrows(): | |
| cluster_md += ( | |
| f"#### 📰 {article['title']}\n" | |
| f"- **Source:** {article['source']}\n" | |
| f"- **Sentiment:** {article['sentiment']}\n" | |
| f"<details><summary><strong>Summary</strong></summary>\n" | |
| f"{article['summary']}\n" | |
| f"</details>\n" | |
| f"- [Read Full Article]({article['url']})\n\n" | |
| ) | |
| markdown_blocks.append(cluster_md) | |
| while len(markdown_blocks) < 5: | |
| markdown_blocks.append("") | |
| return markdown_blocks[:5] | |
| def save_clustered_articles(df, topic): | |
| if df.empty: | |
| return None, None | |
| csv_file = f"{topic.replace(' ', '_')}_clustered_articles.csv" | |
| df.to_csv(csv_file, index=False) | |
| return csv_file, None | |
| # ------------------ Pipeline Trigger ------------------ | |
| def update_ui_with_columns(topic, files, urls, sentiment_filters): | |
| extracted_articles = [] | |
| if topic.strip(): | |
| articles = fetch_content(topic) | |
| if articles: | |
| extracted_articles.extend(extract_summarize_and_analyze_articles(articles)) | |
| if files: | |
| extracted_articles.extend(extract_summarize_and_analyze_content_from_file(files)) | |
| if urls: | |
| url_list = [url.strip() for url in urls.split("\n") if url.strip()] | |
| extracted_articles.extend(extract_summarize_and_analyze_content_from_urls(url_list)) | |
| if not extracted_articles: | |
| return sentiment_filters, "### No content to display", "", "", "", "", None | |
| df = pd.DataFrame(extracted_articles) | |
| result = cluster_news.cluster_and_label_articles(df, content_column="content", summary_column="summary") | |
| cluster_md_blocks = display_clusters_as_columns(result, sentiment_filters) | |
| csv_file, _ = save_clustered_articles(result["dataframe"], topic or "batch_upload") | |
| return sentiment_filters, *cluster_md_blocks, csv_file | |
| def clear_interface(): | |
| return ( | |
| "", # topic_input | |
| ["Positive", "Neutral", "Negative"],# sentiment_filter | |
| gr.update(value=None), # uploaded_files (reset file upload) | |
| "", # urls_input | |
| "", "", "", "", "", # cluster columns 0–4 | |
| gr.update(value=None) # csv_output (reset download file) | |
| ) | |
| # ------------------ Gradio UI ------------------ | |
| with gr.Blocks(theme=gr.themes.Base(), css=".gr-markdown { margin: 10px; }") as demo: | |
| # Header Section | |
| gr.Markdown("# 📰 Quick Pulse") | |
| gr.Markdown("### AI-Powered News Summarization with Real-Time Sentiment and Topic Insights") | |
| gr.Markdown( | |
| "From headlines to insight, Quick Pulse summarizes news stories, captures emotional context, and clusters related topics to provide structured intelligence—faster than ever") | |
| # Input Section | |
| gr.Markdown("---") # Horizontal line for separation | |
| with gr.Accordion("🗞️ Latest Top Headlines", open=False): | |
| latest_news_button = gr.Button("Fetch & Summarize Top 10 Headlines") | |
| with gr.Row(): | |
| topic_input = gr.Textbox(label="Enter Topic", placeholder="e.g. climate change") | |
| sentiment_filter = gr.CheckboxGroup(choices=["Positive", "Neutral", "Negative"], value=["Positive", "Neutral", "Negative"], label="Sentiment Filter") | |
| csv_output = gr.File(label="📁 Download Clustered Digest CSV") | |
| with gr.Accordion("📂 Upload Articles (.txt files)", open=False): | |
| uploaded_files = gr.File(label="Upload .txt Files", file_types=[".txt"], file_count="multiple") | |
| with gr.Accordion("🔗 Enter Multiple URLs", open=False): | |
| urls_input = gr.Textbox(label="Enter URLs (newline separated)", lines=4) | |
| with gr.Row(): | |
| submit_button = gr.Button(" Generate Digest") | |
| clear_button = gr.Button(" Clear") | |
| with gr.Row(): | |
| column_0 = gr.Markdown() | |
| column_1 = gr.Markdown() | |
| column_2 = gr.Markdown() | |
| column_3 = gr.Markdown() | |
| column_4 = gr.Markdown() | |
| submit_button.click( | |
| fn=update_ui_with_columns, | |
| inputs=[topic_input, uploaded_files, urls_input, sentiment_filter], | |
| outputs=[ | |
| sentiment_filter, | |
| column_0, column_1, column_2, column_3, column_4, | |
| csv_output | |
| ] | |
| ) | |
| latest_news_button.click( | |
| fn=fetch_and_process_latest_news, | |
| inputs=[sentiment_filter], | |
| outputs=[ | |
| sentiment_filter, | |
| column_0, column_1, column_2, column_3, column_4, | |
| csv_output | |
| ] | |
| ) | |
| clear_button.click( | |
| fn=clear_interface, | |
| inputs=[], | |
| outputs=[ | |
| topic_input, # 1 | |
| sentiment_filter, # 2 | |
| uploaded_files, # 3 | |
| urls_input, # 4 | |
| column_0, # 5 | |
| column_1, # 6 | |
| column_2, # 7 | |
| column_3, # 8 | |
| column_4, # 9 | |
| csv_output # 10 | |
| ] | |
| ) | |
| if __name__ == "__main__": | |
| demo.launch() | |