Spaces:

Papaflessas
/

gotti_signal_gen

Running

App Files Files Community

gotti_signal_gen / src /news_scraper /services /sentiment_analysis_gemini.py

Papaflessas

Deploy Signal Generator app

3fe0726 8 days ago

raw

history blame contribute delete

9.54 kB

	import requests
	from bs4 import BeautifulSoup
	import ollama
	import json
	import os
	from dotenv import load_dotenv

	load_dotenv()

	class SentimentAnalyzer:
	def __init__(self):
	"""
	Initialize the SentimentAnalyzer class using Ollama.
	"""
	self.model = "llama3.1"

	# Prompt A: Short-Term Sentiment (Full Article)
	self.prompt_short_term = """
	Role: Senior Momentum Trader & News Catalyst Analyst.
	Task: Analyze the provided news for immediate (1-10 days) stock price impact and sentiment velocity.

	Input Article:
	{article_content}

	Instructions:
	1. Analyze the text for specific Short-Term Catalysts:
	- Earnings Surprises (Beat/Miss) or Guidance Changes.
	- Mergers/Acquisitions (M&A) rumors or confirmations.
	- Regulatory approvals/denials or Lawsuits.
	- Viral sentiment or Product launches.
	2. Compare the news against Market Expectations (Is this priced in?).
	3. Assign a Rating (1-5) based on probable Price Action in the next week.

	Rating Scale:
	1 = Strong Sell (Gap Down / Panic Selling likely)
	2 = Bearish (Downward drift / Negative pressure)
	3 = Neutral (No trade / Noise / Already priced in)
	4 = Bullish (Upward momentum / Buying interest)
	5 = Strong Buy (Gap Up / Short Squeeze / Heavy Volume likely)

	Output Format (JSON only, no markdown):
	{{
	"rating": [1-5],
	"sentiment_label": "[Bearish \| Neutral \| Bullish]",
	"catalyst_type": "[Earnings \| M&A \| Macro \| Product \| Legal \| Noise]",
	"risk_level": "[High \| Medium \| Low]",
	"reasoning": "[One sentence on why price will move NOW]"
	}}
	"""

	# Prompt B: Headline Fast-Track (Headline Only)
	self.prompt_headline = """
	Role: High-Frequency Sentiment Engine.
	Task: Classify the sentiment of the stock news headline immediately.

	Headline: "{headline_text}"

	Rules:
	- Focus ONLY on the explicit sentiment of the headline.
	- Ignore potential nuance; assume the market reacts knee-jerk to keywords.
	- Output strictly valid JSON.

	Scoring:
	1 (Very Negative), 2 (Negative), 3 (Neutral), 4 (Positive), 5 (Very Positive).

	Output Format:
	{{
	"score": [1-5],
	"impact": "[High \| Low]",
	"key_phrase": "[Extract the most important 2-3 words]"
	}}
	"""

	def fetch_article_content(self, url):
	"""
	Fetch and extract content from a news article URL using smart headers and selectors.
	"""
	# 1. Define "Real User" Headers to bypass 403 Forbidden
	headers = {
	'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36',
	'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,/;q=0.8',
	'Accept-Language': 'en-US,en;q=0.5',
	'Referer': 'https://www.google.com/'
	}

	try:
	response = requests.get(url, headers=headers, timeout=10)
	response.raise_for_status() # Raise error for bad status codes

	# 2. Parse Content
	soup = BeautifulSoup(response.text, 'html.parser')

	# Yahoo Finance specific selectors (these change occasionally)
	# Try finding the main article body container
	article_body = soup.find('div', class_='caas-body')

	if not article_body:
	# Fallback for different layouts
	article_body = soup.find('div', {'data-component': 'Content'})

	if article_body:
	# Clean text: join paragraphs and remove "Read More" links
	text_content = ' '.join([p.get_text() for p in article_body.find_all('p')])
	return text_content
	else:
	return "Error: Could not parse article body layout."

	except Exception as e:
	return f"Failed to fetch: {str(e)}"

	def analyze_sentiment(self, url=None, content=None):
	"""
	Analyze the sentiment of a news article using Ollama.
	Adapts prompt based on input length (Headline vs Full Article).

	Args:
	url (str, optional): URL of the article to analyze.
	content (str, optional): Content of the article to analyze directly.

	Returns:
	tuple: A tuple containing (rating, analysis, impact_type/catalyst_type).
	"""
	if not url and not content:
	return 3, "No content or URL provided for analysis.", "Unknown"

	# Determine content source
	text_to_analyze = content
	if url and not content:
	text_to_analyze = self.fetch_article_content(url)
	if not text_to_analyze:
	return 3, "Could not fetch article content", "Unknown"

	# Select Persona based on length
	# Heuristic: If text is short (< 250 chars), treat as Headline
	is_headline = len(text_to_analyze) < 250

	if is_headline:
	prompt = self.prompt_headline.format(headline_text=text_to_analyze)
	else:
	prompt = self.prompt_short_term.format(article_content=text_to_analyze)

	try:
	response = ollama.chat(model=self.model, messages=[
	{
	'role': 'user',
	'content': prompt,
	},
	], format='json')

	# Parse the JSON response
	result = json.loads(response['message']['content'])

	if is_headline:
	rating = result.get('score', 3)
	analysis = f"Headline Analysis: {result.get('key_phrase', 'N/A')}"
	impact_type = result.get('impact', "Unknown")
	else:
	rating = result.get('rating', 3)
	analysis = result.get('reasoning', "No analysis provided")
	impact_type = result.get('catalyst_type', "Unknown")

	return rating, analysis, impact_type
	except Exception as e:
	print(f"Error analyzing sentiment: {e}")
	return 3, f"Error analyzing sentiment: {str(e)}", "Unknown"

	def find_matching_news(self, target_headline, news_list):
	"""
	Find the most relevant news item from a list that matches the target headline.

	Args:
	target_headline (str): The headline to match.
	news_list (list): List of news items (dictionaries) from yfinance.

	Returns:
	dict: The matching news item, or None if no match is found.
	"""
	if not news_list:
	return None

	headlines = [item.get('title', '') for item in news_list]

	prompt = f"""
	Role: You are a news aggregator assistant.
	Task: Identify if any of the provided news headlines refer to the same story as the target headline.

	Target Headline: "{target_headline}"

	Candidate Headlines:
	{json.dumps(headlines, indent=2)}

	Instructions:
	1. Compare the Target Headline with the Candidate Headlines.
	2. Determine if there is a match that discusses the same specific event or topic.
	3. If a match is found, return the index of the matching headline (0-based).
	4. If no match is found, return -1.

	Output Format (JSON):
	{{
	"match_index": [Index or -1],
	"reasoning": "[Brief explanation]"
	}}
	"""

	try:
	response = ollama.chat(model=self.model, messages=[
	{
	'role': 'user',
	'content': prompt,
	},
	], format='json')

	result = json.loads(response['message']['content'])
	match_index = result.get('match_index', -1)

	if match_index != -1 and 0 <= match_index < len(news_list):
	return news_list[match_index]
	return None

	except Exception as e:
	print(f"Error finding matching news: {e}")
	return None


	# Example usage
	if __name__ == "__main__":
	import yfinance as yf

	# Get latest news for AAPL
	ticker = yf.Ticker("AAPL")
	news = ticker.news

	if news:
	latest_news = news[0]
	print(f"DEBUG: News object keys: {latest_news.keys()}")

	# Handle different yfinance news structures
	title = latest_news.get('title')
	url = latest_news.get('link')

	if not title and 'content' in latest_news:
	content = latest_news['content']
	title = content.get('title')
	if 'clickThroughUrl' in content and content['clickThroughUrl']:
	url = content['clickThroughUrl'].get('url')

	print(f"Analyzing latest news: {title}")
	print(f"URL: {url}")

	if url:
	analyzer = SentimentAnalyzer()
	result = analyzer.analyze_sentiment(url=url)
	print(result)
	else:
	print("No URL found in news item")
	else:
	print("No news found for AAPL")