Spaces:

harao-ml
/

QuickPulse

Sleeping

App Files Files Community

QuickPulse / app.py

harao-ml

Create app.py

d0c5c2c verified 7 months ago

raw

history blame

10.1 kB

	import gradio as gr
	import pandas as pd
	import cluster_news
	import extract_news
	import summarizer
	import analyze_sentiment
	import gather_news

	# ------------------ Utilities ------------------

	def fetch_content(topic):
	articles = gather_news.fetch_articles_newsapi(topic)
	if isinstance(articles, str):
	articles = gather_news.fetch_articles_google(topic)
	if isinstance(articles, str):
	return None
	try:
	articles = sorted(articles, key=lambda x: x.get("publishedAt", ""), reverse=True)[:10]
	except Exception:
	return None
	return articles

	def fetch_and_process_latest_news(sentiment_filters):
	topic = "Top Headlines"
	articles = gather_news.fetch_articles_newsapi("top headlines")
	if isinstance(articles, str) or not articles:
	return sentiment_filters, "### No latest news available", "", "", "", "", None

	articles = sorted(articles, key=lambda x: x.get("publishedAt", ""), reverse=True)[:10]
	extracted_articles = extract_summarize_and_analyze_articles(articles)

	if not extracted_articles:
	return sentiment_filters, "### No content to display", "", "", "", "", None

	df = pd.DataFrame(extracted_articles)
	result = cluster_news.cluster_and_label_articles(df, content_column="content", summary_column="summary")
	cluster_md_blocks = display_clusters_as_columns(result, sentiment_filters)
	csv_file, _ = save_clustered_articles(result["dataframe"], topic)

	return sentiment_filters, *cluster_md_blocks, csv_file

	def extract_summarize_and_analyze_articles(articles):
	extracted_articles = []
	for article in articles:
	url = article.get("url")
	if url:
	content, _ = extract_news.extract_full_content(url)
	if content:
	summary = summarizer.generate_summary(content)
	sentiment, score = analyze_sentiment.analyze_summary(summary)
	extracted_articles.append({
	"title": article.get("title", "No title"),
	"url": url,
	"source": article.get("source", "Unknown"),
	"author": article.get("author", "Unknown"),
	"publishedAt": article.get("publishedAt", "Unknown"),
	"content": content,
	"summary": summary,
	"sentiment": sentiment,
	"score": score
	})
	return extracted_articles

	def extract_summarize_and_analyze_content_from_file(files):
	extracted_articles = []
	for file in files:
	with open(file.name, "r", encoding="utf-8") as f:
	content = f.read()
	if content.strip():
	summary = summarizer.generate_summary(content)
	sentiment, score = analyze_sentiment.analyze_summary(summary)
	extracted_articles.append({
	"title": "Custom File",
	"url": "N/A",
	"source": "Uploaded File",
	"author": "Unknown",
	"publishedAt": "Unknown",
	"content": content,
	"summary": summary,
	"sentiment": sentiment,
	"score": score
	})
	return extracted_articles

	def extract_summarize_and_analyze_content_from_urls(urls):
	extracted_articles = []
	for url in urls:
	content, title = extract_news.extract_full_content(url)
	if content: # Only proceed if content is successfully extracted
	summary = summarizer.generate_summary(content)
	sentiment, score = analyze_sentiment.analyze_summary(summary)
	extracted_articles.append({
	"title": title if title else "Untitled Article",
	"url": url,
	"source": "External Link",
	"author": "Unknown",
	"publishedAt": "Unknown",
	"content": content,
	"summary": summary,
	"sentiment": sentiment,
	"score": score
	})
	return extracted_articles

	def display_clusters_as_columns(result, sentiment_filters=None):
	df = result["dataframe"]
	detected_topics = result.get("detected_topics", {})
	df["sentiment"] = df["sentiment"].str.capitalize()

	if sentiment_filters:
	df = df[df["sentiment"].isin(sentiment_filters)]

	if df.empty:
	return ["### ⚠️ No matching articles."] + [""] * 4

	clusters = df.groupby("cluster_label")
	markdown_blocks = []

	for cluster_label, articles in clusters:
	cluster_md = f"### 🧩 Cluster {cluster_label}\n"
	if cluster_label in detected_topics:
	topics = detected_topics[cluster_label]
	cluster_md += f"Primary Topic: {topics['primary_focus']}\n\n"
	if topics["related_topics"]:
	cluster_md += f"Related Topics: {', '.join(topics['related_topics'])}\n\n"
	cluster_md += f"Articles: {len(articles)}\n\n"
	for _, article in articles.iterrows():
	cluster_md += (
	f"#### 📰 {article['title']}\n"
	f"- Source: {article['source']}\n"
	f"- Sentiment: {article['sentiment']}\n"
	f"<details><summary><strong>Summary</strong></summary>\n"
	f"{article['summary']}\n"
	f"</details>\n"
	f"- [Read Full Article]({article['url']})\n\n"
	)

	markdown_blocks.append(cluster_md)

	while len(markdown_blocks) < 5:
	markdown_blocks.append("")

	return markdown_blocks[:5]

	def save_clustered_articles(df, topic):
	if df.empty:
	return None, None
	csv_file = f"{topic.replace(' ', '_')}_clustered_articles.csv"
	df.to_csv(csv_file, index=False)
	return csv_file, None

	# ------------------ Pipeline Trigger ------------------

	def update_ui_with_columns(topic, files, urls, sentiment_filters):
	extracted_articles = []

	if topic.strip():
	articles = fetch_content(topic)
	if articles:
	extracted_articles.extend(extract_summarize_and_analyze_articles(articles))

	if files:
	extracted_articles.extend(extract_summarize_and_analyze_content_from_file(files))

	if urls:
	url_list = [url.strip() for url in urls.split("\n") if url.strip()]
	extracted_articles.extend(extract_summarize_and_analyze_content_from_urls(url_list))

	if not extracted_articles:
	return sentiment_filters, "### No content to display", "", "", "", "", None

	df = pd.DataFrame(extracted_articles)
	result = cluster_news.cluster_and_label_articles(df, content_column="content", summary_column="summary")
	cluster_md_blocks = display_clusters_as_columns(result, sentiment_filters)
	csv_file, _ = save_clustered_articles(result["dataframe"], topic or "batch_upload")

	return sentiment_filters, *cluster_md_blocks, csv_file

	def clear_interface():
	return (
	"", # topic_input
	["Positive", "Neutral", "Negative"],# sentiment_filter
	gr.update(value=None), # uploaded_files (reset file upload)
	"", # urls_input
	"", "", "", "", "", # cluster columns 0–4
	gr.update(value=None) # csv_output (reset download file)
	)


	# ------------------ Gradio UI ------------------

	with gr.Blocks(theme=gr.themes.Base(), css=".gr-markdown { margin: 10px; }") as demo:

	# Header Section
	gr.Markdown("# 📰 Quick Pulse")
	gr.Markdown("### AI-Powered News Summarization with Real-Time Sentiment and Topic Insights")
	gr.Markdown(
	"From headlines to insight, Quick Pulse summarizes news stories, captures emotional context, and clusters related topics to provide structured intelligence—faster than ever")

	# Input Section
	gr.Markdown("---") # Horizontal line for separation
	with gr.Accordion("🗞️ Latest Top Headlines", open=False):
	latest_news_button = gr.Button("Fetch & Summarize Top 10 Headlines")

	with gr.Row():
	topic_input = gr.Textbox(label="Enter Topic", placeholder="e.g. climate change")
	sentiment_filter = gr.CheckboxGroup(choices=["Positive", "Neutral", "Negative"], value=["Positive", "Neutral", "Negative"], label="Sentiment Filter")
	csv_output = gr.File(label="📁 Download Clustered Digest CSV")

	with gr.Accordion("📂 Upload Articles (.txt files)", open=False):
	uploaded_files = gr.File(label="Upload .txt Files", file_types=[".txt"], file_count="multiple")

	with gr.Accordion("🔗 Enter Multiple URLs", open=False):
	urls_input = gr.Textbox(label="Enter URLs (newline separated)", lines=4)

	with gr.Row():
	submit_button = gr.Button(" Generate Digest")
	clear_button = gr.Button(" Clear")

	with gr.Row():
	column_0 = gr.Markdown()
	column_1 = gr.Markdown()
	column_2 = gr.Markdown()
	column_3 = gr.Markdown()
	column_4 = gr.Markdown()

	submit_button.click(
	fn=update_ui_with_columns,
	inputs=[topic_input, uploaded_files, urls_input, sentiment_filter],
	outputs=[
	sentiment_filter,
	column_0, column_1, column_2, column_3, column_4,
	csv_output
	]
	)

	latest_news_button.click(
	fn=fetch_and_process_latest_news,
	inputs=[sentiment_filter],
	outputs=[
	sentiment_filter,
	column_0, column_1, column_2, column_3, column_4,
	csv_output
	]
	)

	clear_button.click(
	fn=clear_interface,
	inputs=[],
	outputs=[
	topic_input, # 1
	sentiment_filter, # 2
	uploaded_files, # 3
	urls_input, # 4
	column_0, # 5
	column_1, # 6
	column_2, # 7
	column_3, # 8
	column_4, # 9
	csv_output # 10
	]
	)



	if __name__ == "__main__":
	demo.launch()