Spaces:
Running
Running
| import requests | |
| from bs4 import BeautifulSoup | |
| import ollama | |
| import json | |
| import os | |
| from dotenv import load_dotenv | |
| load_dotenv() | |
| class SentimentAnalyzer: | |
| def __init__(self): | |
| """ | |
| Initialize the SentimentAnalyzer class using Ollama. | |
| """ | |
| self.model = "llama3.1" | |
| # Prompt A: Short-Term Sentiment (Full Article) | |
| self.prompt_short_term = """ | |
| Role: Senior Momentum Trader & News Catalyst Analyst. | |
| Task: Analyze the provided news for immediate (1-10 days) stock price impact and sentiment velocity. | |
| Input Article: | |
| {article_content} | |
| Instructions: | |
| 1. Analyze the text for specific **Short-Term Catalysts**: | |
| - Earnings Surprises (Beat/Miss) or Guidance Changes. | |
| - Mergers/Acquisitions (M&A) rumors or confirmations. | |
| - Regulatory approvals/denials or Lawsuits. | |
| - Viral sentiment or Product launches. | |
| 2. Compare the news against **Market Expectations** (Is this priced in?). | |
| 3. Assign a Rating (1-5) based on probable **Price Action** in the next week. | |
| Rating Scale: | |
| 1 = Strong Sell (Gap Down / Panic Selling likely) | |
| 2 = Bearish (Downward drift / Negative pressure) | |
| 3 = Neutral (No trade / Noise / Already priced in) | |
| 4 = Bullish (Upward momentum / Buying interest) | |
| 5 = Strong Buy (Gap Up / Short Squeeze / Heavy Volume likely) | |
| Output Format (JSON only, no markdown): | |
| {{ | |
| "rating": [1-5], | |
| "sentiment_label": "[Bearish | Neutral | Bullish]", | |
| "catalyst_type": "[Earnings | M&A | Macro | Product | Legal | Noise]", | |
| "risk_level": "[High | Medium | Low]", | |
| "reasoning": "[One sentence on why price will move NOW]" | |
| }} | |
| """ | |
| # Prompt B: Headline Fast-Track (Headline Only) | |
| self.prompt_headline = """ | |
| Role: High-Frequency Sentiment Engine. | |
| Task: Classify the sentiment of the stock news headline immediately. | |
| Headline: "{headline_text}" | |
| Rules: | |
| - Focus ONLY on the explicit sentiment of the headline. | |
| - Ignore potential nuance; assume the market reacts knee-jerk to keywords. | |
| - Output strictly valid JSON. | |
| Scoring: | |
| 1 (Very Negative), 2 (Negative), 3 (Neutral), 4 (Positive), 5 (Very Positive). | |
| Output Format: | |
| {{ | |
| "score": [1-5], | |
| "impact": "[High | Low]", | |
| "key_phrase": "[Extract the most important 2-3 words]" | |
| }} | |
| """ | |
| def fetch_article_content(self, url): | |
| """ | |
| Fetch and extract content from a news article URL using smart headers and selectors. | |
| """ | |
| # 1. Define "Real User" Headers to bypass 403 Forbidden | |
| headers = { | |
| 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36', | |
| 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8', | |
| 'Accept-Language': 'en-US,en;q=0.5', | |
| 'Referer': 'https://www.google.com/' | |
| } | |
| try: | |
| response = requests.get(url, headers=headers, timeout=10) | |
| response.raise_for_status() # Raise error for bad status codes | |
| # 2. Parse Content | |
| soup = BeautifulSoup(response.text, 'html.parser') | |
| # Yahoo Finance specific selectors (these change occasionally) | |
| # Try finding the main article body container | |
| article_body = soup.find('div', class_='caas-body') | |
| if not article_body: | |
| # Fallback for different layouts | |
| article_body = soup.find('div', {'data-component': 'Content'}) | |
| if article_body: | |
| # Clean text: join paragraphs and remove "Read More" links | |
| text_content = ' '.join([p.get_text() for p in article_body.find_all('p')]) | |
| return text_content | |
| else: | |
| return "Error: Could not parse article body layout." | |
| except Exception as e: | |
| return f"Failed to fetch: {str(e)}" | |
| def analyze_sentiment(self, url=None, content=None): | |
| """ | |
| Analyze the sentiment of a news article using Ollama. | |
| Adapts prompt based on input length (Headline vs Full Article). | |
| Args: | |
| url (str, optional): URL of the article to analyze. | |
| content (str, optional): Content of the article to analyze directly. | |
| Returns: | |
| tuple: A tuple containing (rating, analysis, impact_type/catalyst_type). | |
| """ | |
| if not url and not content: | |
| return 3, "No content or URL provided for analysis.", "Unknown" | |
| # Determine content source | |
| text_to_analyze = content | |
| if url and not content: | |
| text_to_analyze = self.fetch_article_content(url) | |
| if not text_to_analyze: | |
| return 3, "Could not fetch article content", "Unknown" | |
| # Select Persona based on length | |
| # Heuristic: If text is short (< 250 chars), treat as Headline | |
| is_headline = len(text_to_analyze) < 250 | |
| if is_headline: | |
| prompt = self.prompt_headline.format(headline_text=text_to_analyze) | |
| else: | |
| prompt = self.prompt_short_term.format(article_content=text_to_analyze) | |
| try: | |
| response = ollama.chat(model=self.model, messages=[ | |
| { | |
| 'role': 'user', | |
| 'content': prompt, | |
| }, | |
| ], format='json') | |
| # Parse the JSON response | |
| result = json.loads(response['message']['content']) | |
| if is_headline: | |
| rating = result.get('score', 3) | |
| analysis = f"Headline Analysis: {result.get('key_phrase', 'N/A')}" | |
| impact_type = result.get('impact', "Unknown") | |
| else: | |
| rating = result.get('rating', 3) | |
| analysis = result.get('reasoning', "No analysis provided") | |
| impact_type = result.get('catalyst_type', "Unknown") | |
| return rating, analysis, impact_type | |
| except Exception as e: | |
| print(f"Error analyzing sentiment: {e}") | |
| return 3, f"Error analyzing sentiment: {str(e)}", "Unknown" | |
| def find_matching_news(self, target_headline, news_list): | |
| """ | |
| Find the most relevant news item from a list that matches the target headline. | |
| Args: | |
| target_headline (str): The headline to match. | |
| news_list (list): List of news items (dictionaries) from yfinance. | |
| Returns: | |
| dict: The matching news item, or None if no match is found. | |
| """ | |
| if not news_list: | |
| return None | |
| headlines = [item.get('title', '') for item in news_list] | |
| prompt = f""" | |
| Role: You are a news aggregator assistant. | |
| Task: Identify if any of the provided news headlines refer to the same story as the target headline. | |
| Target Headline: "{target_headline}" | |
| Candidate Headlines: | |
| {json.dumps(headlines, indent=2)} | |
| Instructions: | |
| 1. Compare the Target Headline with the Candidate Headlines. | |
| 2. Determine if there is a match that discusses the same specific event or topic. | |
| 3. If a match is found, return the index of the matching headline (0-based). | |
| 4. If no match is found, return -1. | |
| Output Format (JSON): | |
| {{ | |
| "match_index": [Index or -1], | |
| "reasoning": "[Brief explanation]" | |
| }} | |
| """ | |
| try: | |
| response = ollama.chat(model=self.model, messages=[ | |
| { | |
| 'role': 'user', | |
| 'content': prompt, | |
| }, | |
| ], format='json') | |
| result = json.loads(response['message']['content']) | |
| match_index = result.get('match_index', -1) | |
| if match_index != -1 and 0 <= match_index < len(news_list): | |
| return news_list[match_index] | |
| return None | |
| except Exception as e: | |
| print(f"Error finding matching news: {e}") | |
| return None | |
| # Example usage | |
| if __name__ == "__main__": | |
| import yfinance as yf | |
| # Get latest news for AAPL | |
| ticker = yf.Ticker("AAPL") | |
| news = ticker.news | |
| if news: | |
| latest_news = news[0] | |
| print(f"DEBUG: News object keys: {latest_news.keys()}") | |
| # Handle different yfinance news structures | |
| title = latest_news.get('title') | |
| url = latest_news.get('link') | |
| if not title and 'content' in latest_news: | |
| content = latest_news['content'] | |
| title = content.get('title') | |
| if 'clickThroughUrl' in content and content['clickThroughUrl']: | |
| url = content['clickThroughUrl'].get('url') | |
| print(f"Analyzing latest news: {title}") | |
| print(f"URL: {url}") | |
| if url: | |
| analyzer = SentimentAnalyzer() | |
| result = analyzer.analyze_sentiment(url=url) | |
| print(result) | |
| else: | |
| print("No URL found in news item") | |
| else: | |
| print("No news found for AAPL") |