gotti_signal_gen / src /news_scraper /services /sentiment_analysis_gemini.py
Papaflessas's picture
Deploy Signal Generator app
3fe0726
import requests
from bs4 import BeautifulSoup
import ollama
import json
import os
from dotenv import load_dotenv
load_dotenv()
class SentimentAnalyzer:
def __init__(self):
"""
Initialize the SentimentAnalyzer class using Ollama.
"""
self.model = "llama3.1"
# Prompt A: Short-Term Sentiment (Full Article)
self.prompt_short_term = """
Role: Senior Momentum Trader & News Catalyst Analyst.
Task: Analyze the provided news for immediate (1-10 days) stock price impact and sentiment velocity.
Input Article:
{article_content}
Instructions:
1. Analyze the text for specific **Short-Term Catalysts**:
- Earnings Surprises (Beat/Miss) or Guidance Changes.
- Mergers/Acquisitions (M&A) rumors or confirmations.
- Regulatory approvals/denials or Lawsuits.
- Viral sentiment or Product launches.
2. Compare the news against **Market Expectations** (Is this priced in?).
3. Assign a Rating (1-5) based on probable **Price Action** in the next week.
Rating Scale:
1 = Strong Sell (Gap Down / Panic Selling likely)
2 = Bearish (Downward drift / Negative pressure)
3 = Neutral (No trade / Noise / Already priced in)
4 = Bullish (Upward momentum / Buying interest)
5 = Strong Buy (Gap Up / Short Squeeze / Heavy Volume likely)
Output Format (JSON only, no markdown):
{{
"rating": [1-5],
"sentiment_label": "[Bearish | Neutral | Bullish]",
"catalyst_type": "[Earnings | M&A | Macro | Product | Legal | Noise]",
"risk_level": "[High | Medium | Low]",
"reasoning": "[One sentence on why price will move NOW]"
}}
"""
# Prompt B: Headline Fast-Track (Headline Only)
self.prompt_headline = """
Role: High-Frequency Sentiment Engine.
Task: Classify the sentiment of the stock news headline immediately.
Headline: "{headline_text}"
Rules:
- Focus ONLY on the explicit sentiment of the headline.
- Ignore potential nuance; assume the market reacts knee-jerk to keywords.
- Output strictly valid JSON.
Scoring:
1 (Very Negative), 2 (Negative), 3 (Neutral), 4 (Positive), 5 (Very Positive).
Output Format:
{{
"score": [1-5],
"impact": "[High | Low]",
"key_phrase": "[Extract the most important 2-3 words]"
}}
"""
def fetch_article_content(self, url):
"""
Fetch and extract content from a news article URL using smart headers and selectors.
"""
# 1. Define "Real User" Headers to bypass 403 Forbidden
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36',
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
'Accept-Language': 'en-US,en;q=0.5',
'Referer': 'https://www.google.com/'
}
try:
response = requests.get(url, headers=headers, timeout=10)
response.raise_for_status() # Raise error for bad status codes
# 2. Parse Content
soup = BeautifulSoup(response.text, 'html.parser')
# Yahoo Finance specific selectors (these change occasionally)
# Try finding the main article body container
article_body = soup.find('div', class_='caas-body')
if not article_body:
# Fallback for different layouts
article_body = soup.find('div', {'data-component': 'Content'})
if article_body:
# Clean text: join paragraphs and remove "Read More" links
text_content = ' '.join([p.get_text() for p in article_body.find_all('p')])
return text_content
else:
return "Error: Could not parse article body layout."
except Exception as e:
return f"Failed to fetch: {str(e)}"
def analyze_sentiment(self, url=None, content=None):
"""
Analyze the sentiment of a news article using Ollama.
Adapts prompt based on input length (Headline vs Full Article).
Args:
url (str, optional): URL of the article to analyze.
content (str, optional): Content of the article to analyze directly.
Returns:
tuple: A tuple containing (rating, analysis, impact_type/catalyst_type).
"""
if not url and not content:
return 3, "No content or URL provided for analysis.", "Unknown"
# Determine content source
text_to_analyze = content
if url and not content:
text_to_analyze = self.fetch_article_content(url)
if not text_to_analyze:
return 3, "Could not fetch article content", "Unknown"
# Select Persona based on length
# Heuristic: If text is short (< 250 chars), treat as Headline
is_headline = len(text_to_analyze) < 250
if is_headline:
prompt = self.prompt_headline.format(headline_text=text_to_analyze)
else:
prompt = self.prompt_short_term.format(article_content=text_to_analyze)
try:
response = ollama.chat(model=self.model, messages=[
{
'role': 'user',
'content': prompt,
},
], format='json')
# Parse the JSON response
result = json.loads(response['message']['content'])
if is_headline:
rating = result.get('score', 3)
analysis = f"Headline Analysis: {result.get('key_phrase', 'N/A')}"
impact_type = result.get('impact', "Unknown")
else:
rating = result.get('rating', 3)
analysis = result.get('reasoning', "No analysis provided")
impact_type = result.get('catalyst_type', "Unknown")
return rating, analysis, impact_type
except Exception as e:
print(f"Error analyzing sentiment: {e}")
return 3, f"Error analyzing sentiment: {str(e)}", "Unknown"
def find_matching_news(self, target_headline, news_list):
"""
Find the most relevant news item from a list that matches the target headline.
Args:
target_headline (str): The headline to match.
news_list (list): List of news items (dictionaries) from yfinance.
Returns:
dict: The matching news item, or None if no match is found.
"""
if not news_list:
return None
headlines = [item.get('title', '') for item in news_list]
prompt = f"""
Role: You are a news aggregator assistant.
Task: Identify if any of the provided news headlines refer to the same story as the target headline.
Target Headline: "{target_headline}"
Candidate Headlines:
{json.dumps(headlines, indent=2)}
Instructions:
1. Compare the Target Headline with the Candidate Headlines.
2. Determine if there is a match that discusses the same specific event or topic.
3. If a match is found, return the index of the matching headline (0-based).
4. If no match is found, return -1.
Output Format (JSON):
{{
"match_index": [Index or -1],
"reasoning": "[Brief explanation]"
}}
"""
try:
response = ollama.chat(model=self.model, messages=[
{
'role': 'user',
'content': prompt,
},
], format='json')
result = json.loads(response['message']['content'])
match_index = result.get('match_index', -1)
if match_index != -1 and 0 <= match_index < len(news_list):
return news_list[match_index]
return None
except Exception as e:
print(f"Error finding matching news: {e}")
return None
# Example usage
if __name__ == "__main__":
import yfinance as yf
# Get latest news for AAPL
ticker = yf.Ticker("AAPL")
news = ticker.news
if news:
latest_news = news[0]
print(f"DEBUG: News object keys: {latest_news.keys()}")
# Handle different yfinance news structures
title = latest_news.get('title')
url = latest_news.get('link')
if not title and 'content' in latest_news:
content = latest_news['content']
title = content.get('title')
if 'clickThroughUrl' in content and content['clickThroughUrl']:
url = content['clickThroughUrl'].get('url')
print(f"Analyzing latest news: {title}")
print(f"URL: {url}")
if url:
analyzer = SentimentAnalyzer()
result = analyzer.analyze_sentiment(url=url)
print(result)
else:
print("No URL found in news item")
else:
print("No news found for AAPL")