Papaflessas commited on
Commit
3fe0726
·
1 Parent(s): 6f60a15

Deploy Signal Generator app

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. Dockerfile +40 -0
  2. README.md +80 -1
  3. entrypoint.sh +26 -0
  4. requirements.txt +21 -0
  5. run_saturday_analysis.py +86 -0
  6. src/calendar_scraper/adapters/__init__.py +2 -0
  7. src/calendar_scraper/adapters/nasdaq.py +637 -0
  8. src/calendar_scraper/get_calendar_events.py +152 -0
  9. src/calendar_scraper/models/__init__.py +3 -0
  10. src/calendar_scraper/models/dividend_calendar.py +17 -0
  11. src/calendar_scraper/models/earnings_calendar.py +18 -0
  12. src/calendar_scraper/models/economic_calendar.py +20 -0
  13. src/calendar_scraper/models/ipo_calendar.py +16 -0
  14. src/calendar_scraper/models/stock_split_calendar.py +17 -0
  15. src/db/__init__.py +45 -0
  16. src/db/adapters.py +622 -0
  17. src/db/db_util.py +352 -0
  18. src/db/examples.py +453 -0
  19. src/db/get_event_from_db.py +49 -0
  20. src/db/isrgrootx1.pem +31 -0
  21. src/db/local_database.py +1217 -0
  22. src/db/migrate_local_to_tidb.py +132 -0
  23. src/db/verify_migration.py +22 -0
  24. src/fundamental_analysis/calculator.py +610 -0
  25. src/fundamental_analysis/data_fetcher.py +336 -0
  26. src/fundamental_analysis/decision_engine.py +542 -0
  27. src/fundamental_analysis/decision_maker.py +620 -0
  28. src/fundamental_analysis/financial_analyzer.py +382 -0
  29. src/fundamental_analysis/main.py +32 -0
  30. src/fundamental_analysis/metrics.py +616 -0
  31. src/fundamental_analysis/sector_analyzer.py +415 -0
  32. src/fundamental_analysis/test_analyzer.py +44 -0
  33. src/fundamental_analysis/valuation_engine.py +330 -0
  34. src/logs/news_sentiment_2025-11-10.xlsx +0 -0
  35. src/logs/news_sentiment_2025-11-15.xlsx +0 -0
  36. src/main.py +205 -0
  37. src/news_processing_performance.csv +37 -0
  38. src/news_scraper/adapters/__init__.py +2 -0
  39. src/news_scraper/adapters/alpaca_ws.py +195 -0
  40. src/news_scraper/adapters/base_adapter.py +10 -0
  41. src/news_scraper/adapters/bloomberg_adapter.py +19 -0
  42. src/news_scraper/adapters/motley_fool_adapter.py +19 -0
  43. src/news_scraper/adapters/yahoo_finance_adapter.py +44 -0
  44. src/news_scraper/helpers/news_db_logger.py +248 -0
  45. src/news_scraper/helpers/performance_logger.py +63 -0
  46. src/news_scraper/helpers/timer.py +138 -0
  47. src/news_scraper/interfaces/__init__.py +2 -0
  48. src/news_scraper/main.py +69 -0
  49. src/news_scraper/models/__init__.py +3 -0
  50. src/news_scraper/models/article.py +14 -0
Dockerfile ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Base image
2
+ FROM ubuntu:22.04
3
+
4
+ # Set working directory
5
+ WORKDIR /app
6
+
7
+ # Install system dependencies
8
+ RUN apt-get update && apt-get install -y \
9
+ python3 \
10
+ python3-pip \
11
+ curl \
12
+ git \
13
+ && rm -rf /var/lib/apt/lists/*
14
+
15
+ # Install Ollama
16
+ RUN curl -fsSL https://ollama.com/install.sh | sh
17
+
18
+ # Copy necessary files
19
+ COPY requirements.txt .
20
+ COPY src/ src/
21
+ COPY run_saturday_analysis.py .
22
+ COPY entrypoint.sh .
23
+ # Copy other possible dependencies if they are at root like setup.py or local modules
24
+ # For now, assuming src/ structure covers most imports, but let's be safe
25
+ COPY . .
26
+
27
+ # Install Python dependencies
28
+ RUN pip3 install --no-cache-dir -r requirements.txt
29
+
30
+ # Create a non-root user (Hugging Face Spaces runs as user 1000)
31
+ RUN useradd -m -u 1000 user
32
+ USER user
33
+ ENV HOME=/home/user \
34
+ PATH=/home/user/.local/bin:$PATH
35
+
36
+ # Expose the port
37
+ EXPOSE 7860
38
+
39
+ # Set entrypoint
40
+ ENTRYPOINT ["./entrypoint.sh"]
README.md CHANGED
@@ -7,4 +7,83 @@ sdk: docker
7
  pinned: false
8
  ---
9
 
10
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7
  pinned: false
8
  ---
9
 
10
+ # Stock Alchemist Signal Generator
11
+
12
+ A Dockerized FastAPI application that generates stock trading signals using Ollama (LLaMA 3.1) and performs scheduled fundamental analysis. Designed to run on Hugging Face Spaces.
13
+
14
+ ## Features
15
+
16
+ - **Daily Signal Generation**: Prompts a local LLaMA 3.1 model to analyze stocks and generates trading signals.
17
+ - **Weekly Analysis**: Runs a comprehensive fundamental analysis script every Saturday.
18
+ - **Persistent Storage**: Stores results in a TiDB/MySQL database.
19
+ - **Automated Scheduling**: Uses GitHub Actions to trigger daily and weekly tasks.
20
+
21
+ ## Setup Instructions
22
+
23
+ ### 1. Hugging Face Space Setup
24
+
25
+ 1. Create a new Space on [Hugging Face](https://huggingface.co/spaces).
26
+ 2. Select **Docker** as the SDK.
27
+ 3. Choose "Blank" or just point it to your repository.
28
+
29
+ ### 2. Environment Variables
30
+
31
+ In your Hugging Face Space "Settings" -> "Variables and secrets", add the following **Secret** variables:
32
+
33
+ - `API_SECRET`: A strong secret key to protect your endpoints.
34
+ - `DB_HOST`: Your TiDB/MySQL host.
35
+ - `DB_PORT`: Your TiDB/MySQL port (e.g., 4000 for TiDB, 3306 for MySQL).
36
+ - `DB_USERNAME`: Database username.
37
+ - `DB_PASSWORD`: Database password.
38
+ - `DB_DATABASE`: Database name (e.g., `gotti`).
39
+ - `DB_SSL_CA`: (Optional) Path to SSL CA certificate if needed (relative to repository root).
40
+
41
+ ### 3. GitHub Actions Setup
42
+
43
+ To enable the scheduler:
44
+
45
+ 1. Go to your GitHub repository -> **Settings** -> **Secrets and variables** -> **Actions**.
46
+ 2. Add the following **Repository secrets**:
47
+ - `HF_SPACE_URL`: The URL of your Hugging Face Space (e.g., `https://username-space-name.hf.space`).
48
+ - `API_SECRET`: The same secret you set in Hugging Face.
49
+
50
+ ### 4. Database Initialization
51
+
52
+ The application handles table creation automatically in `src/db/local_database.py`. Ensure your database user has `CREATE TABLE` permissions.
53
+
54
+ ## Local Development
55
+
56
+ ### Prerequisites
57
+
58
+ - Docker
59
+ - Python 3.10+
60
+ - Ollama (running locally if testing outside Docker)
61
+
62
+ ### Running with Docker
63
+
64
+ ```bash
65
+ # Build the image
66
+ docker build -t stock-alchemist .
67
+
68
+ # Run the container
69
+ # Note: You need to pass env vars. For local testing, you might want to use --env-file
70
+ docker run -p 7860:7860 \
71
+ -e API_SECRET=test_secret \
72
+ -e DB_HOST=host.docker.internal \
73
+ -e DB_USERNAME=root \
74
+ -e DB_PASSWORD=yourpassword \
75
+ stock-alchemist
76
+ ```
77
+
78
+ ### API Endpoints
79
+
80
+ - `POST /generate-signal`: Trigger signal generation. Headers: `X-API-Secret: <your_secret>` (or `API_SECRET` in local dev). *Note: The code uses `API_SECRET` env var validation.*
81
+ - `POST /saturday-analysis`: Trigger weekly analysis.
82
+ - `GET /health`: Health check and system vitals.
83
+
84
+ ## Project Structure
85
+
86
+ - `Dockerfile` & `entrypoint.sh`: System configuration.
87
+ - `src/main.py`: FastAPI application entry point.
88
+ - `src/db/`: Database interaction logic.
89
+ - `.github/workflows/scheduler.yml`: Automated scheduler.
entrypoint.sh ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/bin/bash
2
+
3
+ # Start Ollama in the background
4
+ ollama serve &
5
+
6
+ # Wait for Ollama to be ready
7
+ echo "Waiting for Ollama to start..."
8
+ while ! curl -s http://localhost:11434/api/tags > /dev/null; do
9
+ sleep 1
10
+ done
11
+ echo "Ollama is ready!"
12
+
13
+ # Pull the model if it doesn't exist
14
+ # We use 'llama3.1' as requested.
15
+ MODEL_NAME="llama3.1"
16
+ if ! ollama list | grep -q "$MODEL_NAME"; then
17
+ echo "Pulling model $MODEL_NAME..."
18
+ ollama pull $MODEL_NAME
19
+ else
20
+ echo "Model $MODEL_NAME already exists."
21
+ fi
22
+
23
+ # Start the FastAPI application
24
+ # Using uvicorn on port 7860 (Hugging Face default)
25
+ echo "Starting FastAPI app..."
26
+ uvicorn src.main:app --host 0.0.0.0 --port 7860
requirements.txt ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ transformers
2
+ torch
3
+ accelerate
4
+ yfinance
5
+ sqlalchemy
6
+ selenium
7
+ xata
8
+ pandas
9
+ numpy
10
+ openpyxl
11
+ mysql-connector-python
12
+ google-generativeai
13
+ beautifulsoup4
14
+ requests
15
+ python-dotenv
16
+ alpaca-py
17
+ schedule
18
+ fastapi
19
+ uvicorn
20
+ pydantic
21
+ openai
run_saturday_analysis.py ADDED
@@ -0,0 +1,86 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Saturday Fundamental Analysis Runner
3
+ Runs fundamental analysis for all tickers in available_tickers table
4
+ """
5
+
6
+ import sys
7
+ from pathlib import Path
8
+ from datetime import datetime
9
+
10
+ # Add src to path
11
+ sys.path.insert(0, str(Path(__file__).parent / "src"))
12
+
13
+ from db.local_database import LocalDatabase, DatabaseEntry, DataType
14
+ from fundamental_analysis.decision_maker import evaluate_stock
15
+
16
+ def run_saturday_analysis():
17
+ """Run fundamental analysis for all available tickers"""
18
+
19
+ print("=" * 60)
20
+ print("📊 SATURDAY FUNDAMENTAL ANALYSIS")
21
+ print("=" * 60)
22
+ print(f"Started at: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n")
23
+
24
+ # Initialize database
25
+ db = LocalDatabase()
26
+
27
+ # Get all available tickers
28
+ tickers = db.get_all_available_tickers()
29
+ total = len(tickers)
30
+
31
+ if total == 0:
32
+ print("⚠️ No tickers found in available_tickers table")
33
+ print("Run: python src/db/populate_tickers.py alpaca")
34
+ return
35
+
36
+ print(f"📋 Found {total} tickers to analyze\n")
37
+
38
+ success_count = 0
39
+ error_count = 0
40
+ skipped_count = 0
41
+
42
+ for i, ticker in enumerate(tickers, 1):
43
+ try:
44
+ print(f"[{i}/{total}] Analyzing {ticker}...", end=" ")
45
+
46
+ # Run fundamental analysis
47
+ result = evaluate_stock(ticker, compare_to_sector=False)
48
+
49
+ if result is None:
50
+ print("⚠️ Skipped (no data)")
51
+ skipped_count += 1
52
+ continue
53
+
54
+ # Save to database
55
+ today = datetime.now().date().isoformat()
56
+ entry = DatabaseEntry(
57
+ date=today,
58
+ data_type=DataType.FUNDAMENTAL.value,
59
+ ticker=ticker,
60
+ data=result,
61
+ metadata={'saturday_batch': True}
62
+ )
63
+
64
+ if db.save(entry, expiry_days=7):
65
+ print(f"✅ {result.get('recommendation', 'HOLD')}")
66
+ success_count += 1
67
+ else:
68
+ print("❌ Save failed")
69
+ error_count += 1
70
+
71
+ except Exception as e:
72
+ print(f"❌ Error: {e}")
73
+ error_count += 1
74
+
75
+ print("\n" + "=" * 60)
76
+ print("📊 ANALYSIS COMPLETE")
77
+ print("=" * 60)
78
+ print(f"✅ Success: {success_count}")
79
+ print(f"⚠️ Skipped: {skipped_count}")
80
+ print(f"❌ Errors: {error_count}")
81
+ print(f"📊 Total: {total}")
82
+ print(f"\nCompleted at: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
83
+ print("=" * 60)
84
+
85
+ if __name__ == "__main__":
86
+ run_saturday_analysis()
src/calendar_scraper/adapters/__init__.py ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ # FILE: /calendar-scraper/adapters/__init__.py
2
+ # This file is intentionally left blank.
src/calendar_scraper/adapters/nasdaq.py ADDED
@@ -0,0 +1,637 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import requests
2
+ from datetime import datetime, date, timedelta
3
+ import sys
4
+ from pathlib import Path
5
+
6
+ # Add src to path for database import
7
+ sys.path.append(str(Path(__file__).parent.parent.parent))
8
+
9
+ from calendar_scraper.models.earnings_calendar import EarningsEvent
10
+ from calendar_scraper.models.economic_calendar import EconomicEvent
11
+ from calendar_scraper.models.ipo_calendar import IPOEvent
12
+ from calendar_scraper.models.stock_split_calendar import StockSplitEvent
13
+ from calendar_scraper.models.dividend_calendar import DividendEvent
14
+ from db.adapters import CalendarAdapter
15
+
16
+ class NasdaqAdapter():
17
+
18
+ def __init__(self):
19
+ self.base_earnings_url = "https://api.nasdaq.com/api/calendar/earnings"
20
+ self.base_ipo_url = "https://api.nasdaq.com/api/ipo/calendar"
21
+ self.base_splits_url = "https://api.nasdaq.com/api/calendar/splits"
22
+ self.base_economic_url = "https://api.nasdaq.com/api/calendar/economicevents"
23
+ self.base_dividends_url = "https://api.nasdaq.com/api/calendar/dividends"
24
+
25
+ self.headers = {
26
+ "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36",
27
+ "Accept": "application/json, text/plain, */*",
28
+ "Referer": "https://www.nasdaq.com/market-activity/earnings",
29
+ "Origin": "https://www.nasdaq.com"
30
+ }
31
+ # Initialize database adapter
32
+ self.db_adapter = CalendarAdapter()
33
+
34
+ def get_earning_events(self):
35
+ """Get earnings calendar data from NASDAQ API"""
36
+ try:
37
+ # Make request to NASDAQ API
38
+ response = requests.get(self.base_earnings_url, headers=self.headers)
39
+ response.raise_for_status()
40
+
41
+ # Parse JSON response
42
+ data = response.json()
43
+
44
+ # Verify response structure
45
+ if not data or 'data' not in data or 'rows' not in data['data']:
46
+ print("Invalid response format from NASDAQ API")
47
+ return []
48
+
49
+ # Extract date from response
50
+ as_of_date_str = data['data'].get('asOf', '')
51
+ try:
52
+ as_of_date = datetime.strptime(as_of_date_str, '%a, %b %d, %Y').date()
53
+ except ValueError:
54
+ as_of_date = date.today()
55
+
56
+ # Parse rows into earnings events
57
+ events = []
58
+ for row in data['data']['rows']:
59
+ try:
60
+ event = self._parse_row_to_event(row, as_of_date)
61
+ if event:
62
+ events.append(event)
63
+ except Exception as e:
64
+ print(f"Error parsing row {row['symbol'] if 'symbol' in row else 'unknown'}: {e}")
65
+
66
+ print(f"Found {len(events)} earnings events from NASDAQ")
67
+ return events
68
+
69
+ except requests.RequestException as e:
70
+ print(f"Error fetching data from NASDAQ API: {e}")
71
+ return []
72
+ except Exception as e:
73
+ print(f"Unexpected error: {e}")
74
+ return []
75
+
76
+ def _parse_row_to_event(self, row, report_date):
77
+ """Convert a row from the NASDAQ API response to an EarningsEvent object"""
78
+ # Extract basic information
79
+ ticker = row.get('symbol', '')
80
+ company = row.get('name', '')
81
+
82
+ # Skip if missing essential data
83
+ if not ticker or not company:
84
+ return None
85
+
86
+ # Parse time of day
87
+ time_str = row.get('time', '').lower()
88
+ if 'after' in time_str or 'post' in time_str:
89
+ time_of_day = 'After Close'
90
+ elif 'pre' in time_str or 'before' in time_str:
91
+ time_of_day = 'Before Open'
92
+ elif 'not-supplied' in time_str:
93
+ time_of_day = 'Unknown'
94
+ else:
95
+ time_of_day = 'During Market'
96
+
97
+ # Extract EPS information
98
+ eps_forecast = row.get('epsForecast', '')
99
+ last_year_eps = row.get('lastYearEPS', '')
100
+
101
+ # Combine EPS data
102
+ eps_info = []
103
+ if eps_forecast:
104
+ eps_info.append(f"Forecast: {eps_forecast}")
105
+ if last_year_eps:
106
+ eps_info.append(f"Last Year: {last_year_eps}")
107
+
108
+ eps = " | ".join(eps_info)
109
+
110
+ # Clean market cap value - remove $ and commas
111
+ market_cap = row.get('marketCap', '')
112
+ if market_cap and isinstance(market_cap, str):
113
+ # Remove $ and commas, then convert to float
114
+ market_cap = market_cap.replace('$', '').replace(',', '')
115
+ if market_cap.strip(): # Check if it's not just whitespace
116
+ try:
117
+ market_cap = float(market_cap)
118
+ except ValueError:
119
+ market_cap = None
120
+
121
+ # Create earnings event object
122
+ event = EarningsEvent(
123
+ date=report_date,
124
+ time=time_of_day,
125
+ company=company,
126
+ ticker=ticker,
127
+ eps=eps,
128
+ revenue="",
129
+ market_cap=market_cap
130
+ )
131
+
132
+ try:
133
+ self.db_adapter.save_earnings_event(
134
+ date=report_date.strftime("%Y-%m-%d"),
135
+ ticker=ticker,
136
+ event_data={
137
+ "company": company,
138
+ "time": time_of_day,
139
+ "eps": eps,
140
+ "revenue": "",
141
+ "market_cap": market_cap,
142
+ "eps_forecast": eps_forecast,
143
+ "last_year_eps": last_year_eps
144
+ }
145
+ )
146
+ # print(f"✅ Saved {ticker} earnings event to database for {report_date.strftime('%Y-%m-%d')}")
147
+ except Exception as e:
148
+ print(f"⚠️ Failed to save {ticker} to database: {e}")
149
+
150
+ return event
151
+
152
+ # ==================== IPO EVENTS ====================
153
+
154
+ def get_ipo_events(self, date_str: str = None) -> list:
155
+ """
156
+ Get IPO calendar data from NASDAQ API
157
+
158
+ Args:
159
+ date_str: Date in format YYYY-MM (e.g., "2025-11")
160
+
161
+ Returns:
162
+ List of IPOEvent objects
163
+ """
164
+ if not date_str:
165
+ date_str = datetime.now().strftime("%Y-%m")
166
+
167
+ try:
168
+ url = f"{self.base_ipo_url}?date={date_str}"
169
+ response = requests.get(url, headers=self.headers)
170
+ response.raise_for_status()
171
+
172
+ data = response.json()
173
+
174
+ if not data or 'data' not in data:
175
+ print("Invalid response format from NASDAQ IPO API")
176
+ return []
177
+
178
+ events = []
179
+
180
+ # Parse priced IPOs
181
+ if 'priced' in data['data'] and 'rows' in data['data']['priced']:
182
+ for row in data['data']['priced']['rows']:
183
+ event = self._parse_ipo_row(row, 'priced')
184
+ if event:
185
+ events.append(event)
186
+
187
+ # Parse upcoming IPOs (nested under upcomingTable)
188
+ if 'upcoming' in data['data'] and data['data']['upcoming']:
189
+ upcoming_data = data['data']['upcoming']
190
+ if 'upcomingTable' in upcoming_data and upcoming_data['upcomingTable']:
191
+ upcoming_table = upcoming_data['upcomingTable']
192
+ if 'rows' in upcoming_table and upcoming_table['rows']:
193
+ for row in upcoming_table['rows']:
194
+ event = self._parse_ipo_row(row, 'upcoming')
195
+ if event:
196
+ events.append(event)
197
+
198
+ # Parse filed IPOs
199
+ if 'filed' in data['data'] and data['data']['filed']:
200
+ filed_data = data['data']['filed']
201
+ # Check if rows are directly under filed or nested
202
+ if 'rows' in filed_data and filed_data['rows']:
203
+ for row in filed_data['rows']:
204
+ event = self._parse_ipo_row(row, 'filed')
205
+ if event:
206
+ events.append(event)
207
+
208
+ print(f"Found {len(events)} IPO events from NASDAQ")
209
+ return events
210
+
211
+ except requests.RequestException as e:
212
+ print(f"Error fetching IPO data from NASDAQ API: {e}")
213
+ return []
214
+ except Exception as e:
215
+ print(f"Unexpected error in get_ipo_events: {e}")
216
+ return []
217
+
218
+ def _parse_ipo_row(self, row, status):
219
+ """Parse IPO row data"""
220
+ try:
221
+ company = row.get('companyName', '')
222
+ ticker = row.get('proposedTickerSymbol', '')
223
+ exchange = row.get('proposedExchange', '')
224
+
225
+ if not company or not ticker:
226
+ return None
227
+
228
+ # Parse date
229
+ date_str = row.get('expectedPriceDate', '') or row.get('pricedDate', '')
230
+ if date_str:
231
+ try:
232
+ event_date = datetime.strptime(date_str, '%m/%d/%Y').date()
233
+ except:
234
+ event_date = datetime.now().date()
235
+ else:
236
+ event_date = datetime.now().date()
237
+
238
+ # Get shares and clean if string (remove commas)
239
+ shares_raw = row.get('sharesOffered', '')
240
+ shares = 0
241
+ if shares_raw:
242
+ try:
243
+ # Remove commas and convert to int
244
+ shares = int(str(shares_raw).replace(',', ''))
245
+ except:
246
+ shares = 0
247
+
248
+ price_range = row.get('proposedSharePrice', '')
249
+ offer_amount = row.get('dollarValueOfSharesOffered', '')
250
+ deal_status = row.get('dealStatus', status)
251
+
252
+ # Calculate market cap from offer amount or price range
253
+ market_cap = None
254
+ try:
255
+ # Try to get from offer amount first
256
+ if offer_amount:
257
+ # Remove $, commas, and convert (e.g., "$1,014,999,994")
258
+ offer_clean = str(offer_amount).replace('$', '').replace(',', '')
259
+ market_cap = float(offer_clean)
260
+ elif shares and price_range:
261
+ # Extract average price from range (e.g., "4.00-6.00")
262
+ if '-' in str(price_range):
263
+ prices = str(price_range).replace('$', '').split('-')
264
+ avg_price = (float(prices[0].strip()) + float(prices[1].strip())) / 2
265
+ market_cap = shares * avg_price
266
+ elif price_range:
267
+ # Single price
268
+ price = float(str(price_range).replace('$', ''))
269
+ market_cap = shares * price
270
+ except:
271
+ pass
272
+
273
+ event = IPOEvent(
274
+ date=event_date,
275
+ company=company,
276
+ ticker=ticker,
277
+ exchange=exchange,
278
+ shares=str(shares) if shares else "",
279
+ price_range=str(price_range) if price_range else "",
280
+ market_cap=market_cap,
281
+ expected_to_trade=deal_status
282
+ )
283
+
284
+ # Save to database
285
+ try:
286
+ from db.local_database import DatabaseEntry, DataType
287
+ entry = DatabaseEntry(
288
+ date=event_date.strftime("%Y-%m-%d"),
289
+ data_type=DataType.IPO.value,
290
+ ticker=ticker,
291
+ data={
292
+ "event_type": "ipo",
293
+ "status": deal_status,
294
+ "company": company,
295
+ "exchange": exchange,
296
+ "shares": str(shares) if shares else "",
297
+ "price_range": str(price_range) if price_range else "",
298
+ "market_cap": market_cap,
299
+ "offer_amount": offer_amount
300
+ },
301
+ metadata={
302
+ "source": "calendar_scraper",
303
+ "scraper": "nasdaq_ipo"
304
+ }
305
+ )
306
+ self.db_adapter.db.save(entry, expiry_days=90)
307
+ # print(f"✅ Saved {ticker} IPO event to database")
308
+ except Exception as e:
309
+ print(f"⚠️ Failed to save {ticker} IPO to database: {e}")
310
+
311
+ return event
312
+
313
+ except Exception as e:
314
+ print(f"Error parsing IPO row: {e}")
315
+ return None
316
+
317
+ # ==================== STOCK SPLIT EVENTS ====================
318
+
319
+ def get_stock_split_events(self, date_str: str = None) -> list:
320
+ """
321
+ Get stock split calendar data from NASDAQ API
322
+
323
+ Args:
324
+ date_str: Date in format YYYY-MM-DD (optional)
325
+
326
+ Returns:
327
+ List of StockSplitEvent objects
328
+ """
329
+ try:
330
+ url = self.base_splits_url
331
+ if date_str:
332
+ url = f"{url}?date={date_str}"
333
+
334
+ response = requests.get(url, headers=self.headers)
335
+ response.raise_for_status()
336
+
337
+ data = response.json()
338
+
339
+ if not data or 'data' not in data or 'rows' not in data['data']:
340
+ print("Invalid response format from NASDAQ Stock Split API")
341
+ return []
342
+
343
+ events = []
344
+ for row in data['data']['rows']:
345
+ event = self._parse_stock_split_row(row)
346
+ if event:
347
+ events.append(event)
348
+
349
+ print(f"Found {len(events)} stock split events from NASDAQ")
350
+ return events
351
+
352
+ except requests.RequestException as e:
353
+ print(f"Error fetching stock split data from NASDAQ API: {e}")
354
+ return []
355
+ except Exception as e:
356
+ print(f"Unexpected error in get_stock_split_events: {e}")
357
+ return []
358
+
359
+ def _parse_stock_split_row(self, row):
360
+ """Parse stock split row data"""
361
+ try:
362
+ # API returns: symbol, name, ratio, executionDate
363
+ company = row.get('name', '')
364
+ ticker = row.get('symbol', '')
365
+ ratio = row.get('ratio', '')
366
+
367
+ if not ticker or not ratio:
368
+ return None
369
+
370
+ # Parse execution date (effective date)
371
+ ex_date_str = row.get('executionDate', '')
372
+ if ex_date_str:
373
+ try:
374
+ ex_date = datetime.strptime(ex_date_str, '%m/%d/%Y').date()
375
+ except:
376
+ ex_date = datetime.now().date()
377
+ else:
378
+ ex_date = datetime.now().date()
379
+
380
+ event = StockSplitEvent(
381
+ date=ex_date,
382
+ company=company,
383
+ ticker=ticker,
384
+ ratio=ratio,
385
+ option_symbol="",
386
+ announcement_date=None,
387
+ ex_date=ex_date
388
+ )
389
+
390
+ # Save to database
391
+ try:
392
+ from db.local_database import DatabaseEntry, DataType
393
+ entry = DatabaseEntry(
394
+ date=ex_date.strftime("%Y-%m-%d"),
395
+ data_type=DataType.STOCK_SPLIT.value,
396
+ ticker=ticker,
397
+ data={
398
+ "event_type": "stock_split",
399
+ "company": company,
400
+ "ratio": ratio,
401
+ "execution_date": ex_date.strftime("%Y-%m-%d")
402
+ },
403
+ metadata={
404
+ "source": "calendar_scraper",
405
+ "scraper": "nasdaq_splits"
406
+ }
407
+ )
408
+ self.db_adapter.db.save(entry, expiry_days=90)
409
+ # print(f"✅ Saved {ticker} stock split event to database")
410
+ except Exception as e:
411
+ print(f"⚠️ Failed to save {ticker} stock split to database: {e}")
412
+
413
+ return event
414
+
415
+ except Exception as e:
416
+ print(f"Error parsing stock split row: {e}")
417
+ return None
418
+
419
+ # ==================== ECONOMIC EVENTS ====================
420
+
421
+ def get_economic_events(self, date_str: str = None, country_list:list[str]=['UNITED_STATES','United States']) -> list:
422
+ """
423
+ Get economic events calendar data from NASDAQ API
424
+
425
+ Args:
426
+ date_str: Date in format YYYY-MM-DD (e.g., "2025-11-06")
427
+
428
+ Returns:
429
+ List of EconomicEvent objects
430
+ """
431
+ if not date_str:
432
+ date_str = datetime.now().strftime("%Y-%m-%d")
433
+
434
+ try:
435
+ url = f"{self.base_economic_url}?date={date_str}"
436
+ response = requests.get(url, headers=self.headers)
437
+ response.raise_for_status()
438
+
439
+ data = response.json()
440
+
441
+ if not data or 'data' not in data or 'rows' not in data['data']:
442
+ print("Invalid response format from NASDAQ Economic Events API")
443
+ return []
444
+
445
+ events = []
446
+ for row in data['data']['rows']:
447
+ event = self._parse_economic_event_row(row, date_str, country_list)
448
+ if event:
449
+ events.append(event)
450
+
451
+ print(f"Found {len(events)} economic events from NASDAQ")
452
+ return events
453
+
454
+ except requests.RequestException as e:
455
+ print(f"Error fetching economic events data from NASDAQ API: {e}")
456
+ return []
457
+ except Exception as e:
458
+ print(f"Unexpected error in get_economic_events: {e}")
459
+ return []
460
+
461
+ def _parse_economic_event_row(self, row, date_str, country_list) :
462
+ """Parse economic event row data"""
463
+ try:
464
+ event_name = row.get('eventName', '')
465
+ country = row.get('country', 'US')
466
+
467
+ if not event_name:
468
+ return None
469
+ if event_name.lower() in ['n/a', 'not available']:
470
+ return None
471
+ if country not in country_list:
472
+ return None
473
+ # Parse event date
474
+ try:
475
+ event_date = datetime.strptime(date_str, '%Y-%m-%d').date()
476
+ except:
477
+ event_date = datetime.now().date()
478
+
479
+ time = row.get('time', '')
480
+ actual = row.get('actual', '')
481
+ consensus = row.get('consensus', '')
482
+ previous = row.get('previous', '')
483
+
484
+ event = EconomicEvent(
485
+ date=event_date,
486
+ time=time,
487
+ country=country,
488
+ importance=row.get('importance', 0),
489
+ event=event_name,
490
+ actual=actual,
491
+ forecast=consensus,
492
+ previous=previous
493
+ )
494
+
495
+ # Save to database
496
+ try:
497
+ self.db_adapter.save_economic_event(
498
+ date=event_date.strftime("%Y-%m-%d"),
499
+ event_data={
500
+ "country": country,
501
+ "time": time,
502
+ "event": event_name,
503
+ "actual": actual,
504
+ "forecast": consensus,
505
+ "previous": previous
506
+ }
507
+ )
508
+ # print(f"✅ Saved {event_name} economic event to database")
509
+ except Exception as e:
510
+ print(f"⚠️ Failed to save {event_name} economic event to database: {e}")
511
+
512
+ return event
513
+
514
+ except Exception as e:
515
+ print(f"Error parsing economic event row: {e}")
516
+ return None
517
+
518
+ # ==================== DIVIDEND EVENTS ====================
519
+
520
+ def get_dividend_events(self, date_str: str = None) -> list:
521
+ """
522
+ Get dividend calendar data from NASDAQ API
523
+
524
+ Args:
525
+ date_str: Date in format YYYY-MM-DD (e.g., "2025-11-06")
526
+
527
+ Returns:
528
+ List of DividendEvent objects
529
+ """
530
+ if not date_str:
531
+ date_str = datetime.now().strftime("%Y-%m-%d")
532
+
533
+ try:
534
+ url = f"{self.base_dividends_url}?date={date_str}"
535
+ response = requests.get(url, headers=self.headers)
536
+ response.raise_for_status()
537
+
538
+ data = response.json()
539
+
540
+ if not data or 'data' not in data or 'calendar' not in data['data']:
541
+ print("Invalid response format from NASDAQ Dividends API")
542
+ return []
543
+
544
+ calendar_data = data['data']['calendar']
545
+ if 'rows' not in calendar_data:
546
+ print("No rows in calendar data")
547
+ return []
548
+
549
+ events = []
550
+ for row in calendar_data['rows']:
551
+ event = self._parse_dividend_row(row, date_str)
552
+ if event:
553
+ events.append(event)
554
+
555
+ print(f"Found {len(events)} dividend events from NASDAQ")
556
+ return events
557
+
558
+ except requests.RequestException as e:
559
+ print(f"Error fetching dividend data from NASDAQ API: {e}")
560
+ return []
561
+ except Exception as e:
562
+ print(f"Unexpected error in get_dividend_events: {e}")
563
+ return []
564
+
565
+ def _parse_dividend_row(self, row, date_str):
566
+ """Parse dividend row data"""
567
+ try:
568
+ # API returns: companyName, symbol, dividend_Ex_Date, payment_Date, record_Date,
569
+ # dividend_Rate, indicated_Annual_Dividend, announcement_Date
570
+ company = row.get('companyName', '')
571
+ ticker = row.get('symbol', '')
572
+ dividend_rate = row.get('dividend_Rate', 0)
573
+
574
+ if not company or not ticker:
575
+ return None
576
+
577
+ # Use ex-dividend date as the event date
578
+ ex_date_str = row.get('dividend_Ex_Date', '')
579
+ if ex_date_str:
580
+ try:
581
+ event_date = datetime.strptime(ex_date_str, '%m/%d/%Y').date()
582
+ except:
583
+ event_date = datetime.now().date()
584
+ else:
585
+ event_date = datetime.now().date()
586
+
587
+ annual_dividend = row.get('indicated_Annual_Dividend', '')
588
+ ex_date = row.get('dividend_Ex_Date', '')
589
+ record_date = row.get('record_Date', '')
590
+ payment_date = row.get('payment_Date', '')
591
+ announcement_date = row.get('announcement_Date', '')
592
+
593
+ event = DividendEvent(
594
+ date=event_date,
595
+ company=company,
596
+ ticker=ticker,
597
+ dividend_rate=str(dividend_rate),
598
+ annual_dividend=str(annual_dividend) if annual_dividend else "",
599
+ ex_date=ex_date,
600
+ record_date=record_date,
601
+ payment_date=payment_date,
602
+ announcement_date=announcement_date
603
+ )
604
+
605
+ # Save to database
606
+ try:
607
+ from db.local_database import DatabaseEntry, DataType
608
+ entry = DatabaseEntry(
609
+ date=event_date.strftime("%Y-%m-%d"),
610
+ data_type=DataType.DIVIDENDS.value,
611
+ ticker=ticker,
612
+ data={
613
+ "event_type": "dividend",
614
+ "company": company,
615
+ "dividend_rate": str(dividend_rate),
616
+ "annual_dividend": str(annual_dividend) if annual_dividend else "",
617
+ "ex_date": ex_date,
618
+ "record_date": record_date,
619
+ "payment_date": payment_date,
620
+ "announcement_date": announcement_date
621
+ },
622
+ metadata={
623
+ "source": "calendar_scraper",
624
+ "scraper": "nasdaq_dividends"
625
+ }
626
+ )
627
+ self.db_adapter.db.save(entry, expiry_days=90)
628
+ # print(f"✅ Saved {ticker} dividend event to database")
629
+ except Exception as e:
630
+ print(f"⚠️ Failed to save {ticker} dividend to database: {e}")
631
+
632
+ return event
633
+
634
+ except Exception as e:
635
+ print(f"Error parsing dividend row: {e}")
636
+ return None
637
+
src/calendar_scraper/get_calendar_events.py ADDED
@@ -0,0 +1,152 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from datetime import date
2
+ import sys
3
+ from pathlib import Path
4
+
5
+ # Add src to path if needed
6
+ sys.path.append(str(Path(__file__).parent.parent))
7
+
8
+ from calendar_scraper.adapters.nasdaq import NasdaqAdapter
9
+ from db.local_database import LocalDatabase, DatabaseEntry, DataType
10
+
11
+ def get_events(_date:date):
12
+ adapter = NasdaqAdapter()
13
+ earnings = adapter.get_earning_events()
14
+ ipos = adapter.get_ipo_events(_date.strftime("%Y-%m"))
15
+ splits = adapter.get_stock_split_events(_date.strftime("%Y-%m-%d"))
16
+ economic = adapter.get_economic_events(_date.strftime("%Y-%m-%d"))
17
+ dividends = adapter.get_dividend_events(_date.strftime("%Y-%m-%d"))
18
+ return {
19
+ "earnings": earnings,
20
+ "ipos": ipos,
21
+ "splits": splits,
22
+ "economic": economic,
23
+ "dividends": dividends
24
+ }
25
+
26
+ def _map_event_to_entry(event, event_type: str, _date: date) -> DatabaseEntry:
27
+ """Map a calendar event object to a DatabaseEntry"""
28
+
29
+ if event_type == "earnings":
30
+ # EarningsEvent(date, time, company, ticker, eps, revenue, market_cap)
31
+ return DatabaseEntry(
32
+ date=_date.isoformat(),
33
+ data_type=DataType.EARNINGS.value,
34
+ ticker=event.ticker,
35
+ data={
36
+ 'event_type': 'earnings',
37
+ 'time': event.time,
38
+ 'company': event.company,
39
+ 'eps': event.eps,
40
+ 'revenue': event.revenue,
41
+ 'market_cap': event.market_cap,
42
+ 'execution_date': event.date # Store the actual event date
43
+ },
44
+ metadata={'source': 'nasdaq_calendar'}
45
+ )
46
+ elif event_type == "ipos":
47
+ # IPOEvent(date, company, ticker, exchange, shares, price_range, market_cap, expected_to_trade)
48
+ return DatabaseEntry(
49
+ date=_date.isoformat(),
50
+ data_type=DataType.IPO.value,
51
+ ticker=event.ticker,
52
+ data={
53
+ 'event_type': 'ipo',
54
+ 'company': event.company,
55
+ 'exchange': event.exchange,
56
+ 'shares': event.shares,
57
+ 'price_range': event.price_range,
58
+ 'market_cap': event.market_cap,
59
+ 'expected_to_trade': event.expected_to_trade,
60
+ 'execution_date': str(event.date)
61
+ },
62
+ metadata={'source': 'nasdaq_calendar'}
63
+ )
64
+ elif event_type == "splits":
65
+ # StockSplitEvent(date, company, ticker, ratio, ...)
66
+ return DatabaseEntry(
67
+ date=_date.isoformat(),
68
+ data_type=DataType.STOCK_SPLIT.value,
69
+ ticker=event.ticker,
70
+ data={
71
+ 'event_type': 'stock_split',
72
+ 'company': event.company,
73
+ 'ratio': event.ratio,
74
+ 'execution_date': str(event.date)
75
+ },
76
+ metadata={'source': 'nasdaq_calendar'}
77
+ )
78
+ elif event_type == "economic":
79
+ # EconomicEvent(date, time, country, importance, event, actual, forecast, previous)
80
+ # Use country as ticker for economic events
81
+ ticker = event.country.upper().replace(' ', '_') if event.country else "GLOBAL"
82
+ return DatabaseEntry(
83
+ date=_date.isoformat(),
84
+ data_type=DataType.ECONOMIC_EVENTS.value,
85
+ ticker=ticker,
86
+ data={
87
+ 'event_type': 'economic',
88
+ 'time': event.time,
89
+ 'importance': event.importance,
90
+ 'event': event.event,
91
+ 'actual': event.actual,
92
+ 'forecast': event.forecast,
93
+ 'previous': event.previous,
94
+ 'execution_date': str(event.date)
95
+ },
96
+ metadata={'source': 'nasdaq_calendar'}
97
+ )
98
+ elif event_type == "dividends":
99
+ # DividendEvent(date, company, ticker, dividend_rate, ...)
100
+ return DatabaseEntry(
101
+ date=_date.isoformat(),
102
+ data_type=DataType.DIVIDENDS.value,
103
+ ticker=event.ticker,
104
+ data={
105
+ 'event_type': 'dividend',
106
+ 'company': event.company,
107
+ 'dividend_rate': event.dividend_rate,
108
+ 'execution_date': str(event.date)
109
+ },
110
+ metadata={'source': 'nasdaq_calendar'}
111
+ )
112
+ return None
113
+
114
+ def save_events_to_db(events_dict: dict, _date: date):
115
+ """Save all fetched events to the database"""
116
+ db = LocalDatabase()
117
+ all_entries = []
118
+
119
+ print(f"Processing events for database saving...")
120
+
121
+ for event_type, events in events_dict.items():
122
+ if not events:
123
+ continue
124
+
125
+ print(f" - Processing {len(events)} {event_type} events...")
126
+ for event in events:
127
+ try:
128
+ entry = _map_event_to_entry(event, event_type, _date)
129
+ if entry:
130
+ all_entries.append(entry)
131
+ except Exception as e:
132
+ print(f"Error mapping {event_type} event: {e}")
133
+
134
+ if all_entries:
135
+ print(f"Saving {len(all_entries)} entries to database...")
136
+ saved_count = db.save_batch(all_entries, expiry_days=30)
137
+ print(f"Successfully saved {saved_count} events to database.")
138
+ else:
139
+ print("No events to save.")
140
+
141
+ if __name__ == "__main__":
142
+ today = date.today()
143
+ print(f"Fetching events for {today}...")
144
+ events = get_events(today)
145
+
146
+ print(f"Earnings Events: {len(events['earnings'])}")
147
+ print(f"IPO Events: {len(events['ipos'])}")
148
+ print(f"Stock Split Events: {len(events['splits'])}")
149
+ print(f"Economic Events: {len(events['economic'])}")
150
+ print(f"Dividend Events: {len(events['dividends'])}")
151
+
152
+ save_events_to_db(events, today)
src/calendar_scraper/models/__init__.py ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ # FILE: /calendar-scraper/calendar_scraper/models/__init__.py
2
+
3
+ # This file is intentionally left blank.
src/calendar_scraper/models/dividend_calendar.py ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from dataclasses import dataclass
2
+ from datetime import date
3
+ from typing import Optional
4
+
5
+
6
+ @dataclass
7
+ class DividendEvent:
8
+ """Represents a dividend event"""
9
+ date: date
10
+ company: str
11
+ ticker: str
12
+ dividend_rate: str
13
+ annual_dividend: Optional[str] = ""
14
+ ex_date: Optional[str] = ""
15
+ record_date: Optional[str] = ""
16
+ payment_date: Optional[str] = ""
17
+ announcement_date: Optional[str] = ""
src/calendar_scraper/models/earnings_calendar.py ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from dataclasses import dataclass
2
+
3
+ @dataclass
4
+ class EarningsEvent:
5
+ def __init__(self, date,time,company, ticker, eps, revenue, market_cap):
6
+ self.date = date
7
+ self.time = time
8
+ self.company = company
9
+ self.ticker = ticker
10
+ self.eps = eps
11
+ self.revenue = revenue
12
+ self.market_cap = market_cap
13
+
14
+ def __repr__(self):
15
+ return self.__print__()
16
+
17
+ def __print__(self):
18
+ return f"EarningsEvent(date={self.date}, time={self.time}, company={self.company}, ticker={self.ticker}, eps={self.eps}, revenue={self.revenue}, market_cap={self.market_cap})"
src/calendar_scraper/models/economic_calendar.py ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ from dataclasses import dataclass
3
+
4
+ @dataclass
5
+ class EconomicEvent:
6
+ def __init__(self, date,time,country,importance, event, actual, forecast, previous):
7
+ self.date = date
8
+ self.time = time
9
+ self.country = country
10
+ self.importance = importance
11
+ self.event = event
12
+ self.actual = actual
13
+ self.forecast = forecast
14
+ self.previous = previous
15
+
16
+ def __repr__(self):
17
+ return self.__print__()
18
+
19
+ def __print__(self):
20
+ return f"EconomicEvent(date={self.date}, time={self.time}, currency={self.country}, importance={self.importance}, event={self.event}, actual={self.actual}, forecast={self.forecast}, previous={self.previous})"
src/calendar_scraper/models/ipo_calendar.py ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from dataclasses import dataclass
2
+ from datetime import date
3
+ from typing import Optional
4
+
5
+
6
+ @dataclass
7
+ class IPOEvent:
8
+ """Represents an IPO event"""
9
+ date: date
10
+ company: str
11
+ ticker: str
12
+ exchange: str
13
+ shares: Optional[str] = ""
14
+ price_range: Optional[str] = ""
15
+ market_cap: Optional[float] = None
16
+ expected_to_trade: Optional[str] = ""
src/calendar_scraper/models/stock_split_calendar.py ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from dataclasses import dataclass
2
+ from datetime import date as date_type
3
+ from typing import Optional
4
+
5
+
6
+ @dataclass
7
+ class StockSplitEvent:
8
+ """Represents a stock split event"""
9
+ date: date_type
10
+ company: str
11
+ ticker: str
12
+ ratio: str
13
+ option_symbol: Optional[str] = ""
14
+ announcement_date: Optional[date_type] = None
15
+ ex_date: Optional[date_type] = None
16
+ record_date: Optional[date_type] = None
17
+ payable_date: Optional[date_type] = None
src/db/__init__.py ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Local Database System for Stock Alchemist
3
+
4
+ A comprehensive database solution for storing and retrieving
5
+ stock market data across multiple modules with JSON file storage
6
+ and SQLite indexing.
7
+ """
8
+
9
+ from .local_database import (
10
+ LocalDatabase,
11
+ DatabaseEntry,
12
+ DataType,
13
+ get_database
14
+ )
15
+
16
+ from .adapters import (
17
+ CalendarAdapter,
18
+ FundamentalAdapter,
19
+ NewsAdapter,
20
+ TechnicalAnalysisAdapter,
21
+ get_calendar_adapter,
22
+ get_fundamental_adapter,
23
+ get_news_adapter,
24
+ get_technical_adapter
25
+ )
26
+
27
+ __all__ = [
28
+ # Core database
29
+ 'LocalDatabase',
30
+ 'DatabaseEntry',
31
+ 'DataType',
32
+ 'get_database',
33
+
34
+ # Adapters
35
+ 'CalendarAdapter',
36
+ 'FundamentalAdapter',
37
+ 'NewsAdapter',
38
+ 'TechnicalAnalysisAdapter',
39
+ 'get_calendar_adapter',
40
+ 'get_fundamental_adapter',
41
+ 'get_news_adapter',
42
+ 'get_technical_adapter',
43
+ ]
44
+
45
+ __version__ = '1.0.0'
src/db/adapters.py ADDED
@@ -0,0 +1,622 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Module-specific adapters for integrating the local database
3
+ with calendar_scraper, fundamental_analysis, and news_scraper
4
+ """
5
+
6
+ from datetime import datetime
7
+ from typing import Dict, Any, List, Optional
8
+ from pathlib import Path
9
+ import sys
10
+
11
+ # Add src to path if needed
12
+ sys.path.append(str(Path(__file__).parent.parent))
13
+
14
+ from db.local_database import LocalDatabase, DatabaseEntry, DataType
15
+
16
+
17
+ class CalendarAdapter:
18
+ """
19
+ Adapter for calendar_scraper module
20
+ Handles earnings_events and economic_events
21
+ """
22
+
23
+ def __init__(self, db: Optional[LocalDatabase] = None):
24
+ self.db = db or LocalDatabase()
25
+
26
+ ### EARINGS ###
27
+
28
+ def save_earnings_event(self, date: str, ticker: str, event_data: Dict[str, Any],
29
+ expiry_days: int = 30) -> bool:
30
+ """
31
+ Save earnings event to database
32
+
33
+ Args:
34
+ date: Event date (YYYY-MM-DD)
35
+ ticker: Stock ticker
36
+ event_data: Event details (company, time, eps, revenue, market_cap)
37
+ expiry_days: Data expiry in days
38
+
39
+ Returns:
40
+ True if successful
41
+ """
42
+ entry = DatabaseEntry(
43
+ date=date,
44
+ data_type=DataType.EARNINGS.value,
45
+ ticker=ticker.upper(),
46
+ data={
47
+ 'event_type': 'earnings',
48
+ **event_data
49
+ },
50
+ metadata={
51
+ 'source': 'calendar_scraper',
52
+ 'scraper': 'earnings'
53
+ }
54
+ )
55
+
56
+ return self.db.save(entry, expiry_days=expiry_days)
57
+
58
+
59
+ def get_earnings_events(self, ticker: str, date_from: str = None,
60
+ date_to: str = None) -> List[DatabaseEntry]:
61
+ """Get earnings events for ticker"""
62
+ entries = self.db.query(
63
+ ticker=ticker.upper(),
64
+ data_type=DataType.EARNINGS.value,
65
+ date_from=date_from,
66
+ date_to=date_to
67
+ )
68
+
69
+ # Filter for earnings events only
70
+ return [e for e in entries if e.data.get('event_type') == 'earnings']
71
+
72
+ ### ECONOMIC EVENTS ###
73
+
74
+ def save_economic_event(self, date: str, event_data: Dict[str, Any],
75
+ expiry_days: int = 7) -> bool:
76
+ """
77
+ Save economic event to database
78
+
79
+ Args:
80
+ date: Event date (YYYY-MM-DD)
81
+ event_data: Event details (country, importance, event, actual, forecast, previous)
82
+ expiry_days: Data expiry in days
83
+
84
+ Returns:
85
+ True if successful
86
+ """
87
+ # Use country as ticker for economic events
88
+ ticker = event_data.get('country', 'GLOBAL').upper().replace(' ', '_')
89
+
90
+ entry = DatabaseEntry(
91
+ date=date,
92
+ data_type=DataType.ECONOMIC_EVENTS.value,
93
+ ticker=ticker,
94
+ data={
95
+ 'event_type': 'economic',
96
+ **event_data
97
+ },
98
+ metadata={
99
+ 'source': 'calendar_scraper',
100
+ 'scraper': 'economic'
101
+ }
102
+ )
103
+
104
+ return self.db.save(entry, expiry_days=expiry_days)
105
+
106
+
107
+ def get_economic_events(self, country: str = None, date_from: str = None,
108
+ date_to: str = None) -> List[DatabaseEntry]:
109
+ """Get economic events"""
110
+ ticker = country.upper().replace(' ', '_') if country else None
111
+
112
+ entries = self.db.query(
113
+ ticker=ticker,
114
+ data_type=DataType.ECONOMIC_EVENTS.value,
115
+ date_from=date_from,
116
+ date_to=date_to
117
+ )
118
+
119
+ # Filter for economic events only
120
+ return [e for e in entries if e.data.get('event_type') == 'economic']
121
+
122
+
123
+ ### DIVIDENDS ###
124
+ def get_dividends_events(self, ticker: str, date_from: str = None,
125
+ date_to: str = None) -> List[DatabaseEntry]:
126
+ """Get dividend events for ticker"""
127
+ entries = self.db.query(
128
+ ticker=ticker.upper(),
129
+ data_type=DataType.DIVIDENDS.value,
130
+ date_from=date_from,
131
+ date_to=date_to
132
+ )
133
+
134
+ # Filter for earnings events only
135
+ return [e for e in entries if e.data.get('event_type') == 'dividend']
136
+
137
+ ### IPOs ###
138
+ def get_ipo_events(self, ticker: str, date_from: str = None,
139
+ date_to: str = None) -> List[DatabaseEntry]:
140
+ """Get ipo events for ticker"""
141
+ entries = self.db.query(
142
+ ticker=ticker.upper(),
143
+ data_type=DataType.IPO.value,
144
+ date_from=date_from,
145
+ date_to=date_to
146
+ )
147
+
148
+ # Filter for earnings events only
149
+ return [e for e in entries if e.data.get('event_type') == 'ipo']
150
+
151
+ ## STOCK SPLITS ###
152
+ def get_stock_split_events(self, ticker: str, date_from: str = None,
153
+ date_to: str = None) -> List[DatabaseEntry]:
154
+ """Get stock split events for ticker"""
155
+ entries = self.db.query(
156
+ ticker=ticker.upper(),
157
+ data_type=DataType.STOCK_SPLIT.value,
158
+ date_from=date_from,
159
+ date_to=date_to
160
+ )
161
+
162
+ # Filter for earnings events only
163
+ return [e for e in entries if e.data.get('event_type') == 'stock_split']
164
+
165
+
166
+ class FundamentalAdapter:
167
+ """
168
+ Adapter for fundamental_analysis module
169
+ Handles financial metrics and investment decisions
170
+ """
171
+
172
+ def __init__(self, db: Optional[LocalDatabase] = None):
173
+ self.db = db or LocalDatabase()
174
+
175
+ def save_financial_metrics(self, date: str, ticker: str, metrics: Dict[str, Any],
176
+ expiry_days: int = 1) -> bool:
177
+ """
178
+ Save financial metrics to database
179
+
180
+ Args:
181
+ date: Analysis date (YYYY-MM-DD)
182
+ ticker: Stock ticker
183
+ metrics: Financial metrics from calculator.py
184
+ expiry_days: Data expiry in days (financial data changes daily)
185
+
186
+ Returns:
187
+ True if successful
188
+ """
189
+ entry = DatabaseEntry(
190
+ date=date,
191
+ data_type=DataType.FUNDAMENTAL.value,
192
+ ticker=ticker.upper(),
193
+ data={
194
+ 'analysis_type': 'metrics',
195
+ 'metrics': metrics
196
+ },
197
+ metadata={
198
+ 'source': 'fundamental_analysis',
199
+ 'module': 'calculator'
200
+ }
201
+ )
202
+
203
+ return self.db.save(entry, expiry_days=expiry_days)
204
+
205
+ def save_investment_decision(self, date: str, ticker: str, decision: Dict[str, Any],
206
+ expiry_days: int = 1) -> bool:
207
+ """
208
+ Save investment decision to database
209
+
210
+ Args:
211
+ date: Decision date (YYYY-MM-DD)
212
+ ticker: Stock ticker
213
+ decision: Investment decision from decision_maker.py
214
+ expiry_days: Data expiry in days
215
+
216
+ Returns:
217
+ True if successful
218
+ """
219
+ entry = DatabaseEntry(
220
+ date=date,
221
+ data_type=DataType.FUNDAMENTAL.value,
222
+ ticker=ticker.upper(),
223
+ data={
224
+ 'analysis_type': 'decision',
225
+ 'recommendation': decision.get('recommendation'),
226
+ 'score': decision.get('final_score'),
227
+ 'confidence': decision.get('confidence'),
228
+ 'reasoning': decision.get('reasoning'),
229
+ 'key_metrics': decision.get('key_metrics'),
230
+ 'category_scores': decision.get('category_scores')
231
+ },
232
+ metadata={
233
+ 'source': 'fundamental_analysis',
234
+ 'module': 'decision_maker'
235
+ }
236
+ )
237
+
238
+ return self.db.save(entry, expiry_days=expiry_days)
239
+
240
+ def save_sector_analysis(self, date: str, sector: str, analysis: Dict[str, Any],
241
+ expiry_days: int = 7) -> bool:
242
+ """
243
+ Save sector analysis to database
244
+
245
+ Args:
246
+ date: Analysis date (YYYY-MM-DD)
247
+ sector: Sector name (e.g., "Technology")
248
+ analysis: Sector comparison data
249
+ expiry_days: Data expiry in days
250
+
251
+ Returns:
252
+ True if successful
253
+ """
254
+ # Use sector name as ticker
255
+ ticker = f"SECTOR_{sector.upper().replace(' ', '_')}"
256
+
257
+ entry = DatabaseEntry(
258
+ date=date,
259
+ data_type=DataType.FUNDAMENTAL.value,
260
+ ticker=ticker,
261
+ data={
262
+ 'analysis_type': 'sector',
263
+ 'sector': sector,
264
+ **analysis
265
+ },
266
+ metadata={
267
+ 'source': 'fundamental_analysis',
268
+ 'module': 'sector_analyzer'
269
+ }
270
+ )
271
+
272
+ return self.db.save(entry, expiry_days=expiry_days)
273
+
274
+ def get_financial_metrics(self, ticker: str, date: str = None) -> Optional[DatabaseEntry]:
275
+ """Get latest financial metrics for ticker"""
276
+ if date:
277
+ return self.db.get(date, DataType.FUNDAMENTAL.value, ticker.upper())
278
+
279
+ # Get most recent
280
+ entries = self.db.query(
281
+ ticker=ticker.upper(),
282
+ data_type=DataType.FUNDAMENTAL.value,
283
+ limit=1
284
+ )
285
+
286
+ return entries[0] if entries else None
287
+
288
+ def get_investment_decisions(self, ticker: str, date_from: str = None,
289
+ date_to: str = None) -> List[DatabaseEntry]:
290
+ """Get investment decision history for ticker"""
291
+ entries = self.db.query(
292
+ ticker=ticker.upper(),
293
+ data_type=DataType.FUNDAMENTAL.value,
294
+ date_from=date_from,
295
+ date_to=date_to
296
+ )
297
+
298
+ # Filter for decisions only
299
+ return [e for e in entries if e.data.get('analysis_type') == 'decision']
300
+
301
+
302
+ class NewsAdapter:
303
+ """
304
+ Adapter for news_scraper module
305
+ Handles news articles and sentiment analysis
306
+ """
307
+
308
+ def __init__(self, db: Optional[LocalDatabase] = None):
309
+ self.db = db or LocalDatabase()
310
+
311
+ def save_news_article(self, date: str, ticker: str, article: Dict[str, Any],
312
+ expiry_days: int = 30) -> bool:
313
+ """
314
+ Save news article to database
315
+
316
+ Args:
317
+ date: Article date (YYYY-MM-DD)
318
+ ticker: Stock ticker
319
+ article: Article data (title, content, source, url, etc.)
320
+ expiry_days: Data expiry in days
321
+
322
+ Returns:
323
+ True if successful
324
+ """
325
+ entry = DatabaseEntry(
326
+ date=date,
327
+ data_type=DataType.NEWS.value,
328
+ ticker=ticker.upper(),
329
+ data={
330
+ 'content_type': 'article',
331
+ **article
332
+ },
333
+ metadata={
334
+ 'source': 'news_scraper',
335
+ 'scraper': article.get('source', 'unknown')
336
+ }
337
+ )
338
+
339
+ return self.db.save(entry, expiry_days=expiry_days)
340
+
341
+ def save_sentiment_analysis(self, date: str, ticker: str, sentiment: Dict[str, Any],
342
+ expiry_days: int = 7) -> bool:
343
+ """
344
+ Save sentiment analysis to database
345
+
346
+ Args:
347
+ date: Analysis date (YYYY-MM-DD)
348
+ ticker: Stock ticker
349
+ sentiment: Sentiment analysis results
350
+ expiry_days: Data expiry in days
351
+
352
+ Returns:
353
+ True if successful
354
+ """
355
+ entry = DatabaseEntry(
356
+ date=date,
357
+ data_type=DataType.NEWS.value,
358
+ ticker=ticker.upper(),
359
+ data={
360
+ 'content_type': 'sentiment',
361
+ **sentiment
362
+ },
363
+ metadata={
364
+ 'source': 'news_scraper',
365
+ 'module': 'sentiment_analysis'
366
+ }
367
+ )
368
+
369
+ return self.db.save(entry, expiry_days=expiry_days)
370
+
371
+ def get_news_articles(self, ticker: str, date_from: str = None,
372
+ date_to: str = None) -> List[DatabaseEntry]:
373
+ """Get news articles for ticker"""
374
+ entries = self.db.query(
375
+ ticker=ticker.upper(),
376
+ data_type=DataType.NEWS.value,
377
+ date_from=date_from,
378
+ date_to=date_to
379
+ )
380
+
381
+ # Filter for articles only
382
+ return [e for e in entries if e.data.get('content_type') == 'article']
383
+
384
+ def get_sentiment_history(self, ticker: str, date_from: str = None,
385
+ date_to: str = None) -> List[DatabaseEntry]:
386
+ """Get sentiment analysis history for ticker"""
387
+ entries = self.db.query(
388
+ ticker=ticker.upper(),
389
+ data_type=DataType.NEWS.value,
390
+ date_from=date_from,
391
+ date_to=date_to
392
+ )
393
+
394
+ # Filter for sentiment only
395
+ return [e for e in entries if e.data.get('content_type') == 'sentiment']
396
+
397
+
398
+ class TechnicalAnalysisAdapter:
399
+ """
400
+ Adapter for technical analysis data
401
+ Can be used for price data, indicators, signals
402
+ """
403
+
404
+ def __init__(self, db: Optional[LocalDatabase] = None):
405
+ self.db = db or LocalDatabase()
406
+
407
+ def save_technical_indicators(self, date: str, ticker: str, indicators: Dict[str, Any],
408
+ expiry_days: int = 1) -> bool:
409
+ """
410
+ Save technical indicators to database
411
+
412
+ Args:
413
+ date: Analysis date (YYYY-MM-DD)
414
+ ticker: Stock ticker
415
+ indicators: Technical indicators (RSI, MACD, etc.)
416
+ expiry_days: Data expiry in days
417
+
418
+ Returns:
419
+ True if successful
420
+ """
421
+ entry = DatabaseEntry(
422
+ date=date,
423
+ data_type=DataType.TECHNICAL_ANALYSIS.value,
424
+ ticker=ticker.upper(),
425
+ data={
426
+ 'analysis_type': 'indicators',
427
+ **indicators
428
+ },
429
+ metadata={
430
+ 'source': 'technical_analysis'
431
+ }
432
+ )
433
+
434
+ return self.db.save(entry, expiry_days=expiry_days)
435
+
436
+ def save_trading_signal(self, date: str, ticker: str, signal: Dict[str, Any],
437
+ expiry_days: int = 1) -> bool:
438
+ """
439
+ Save trading signal to database
440
+
441
+ Args:
442
+ date: Signal date (YYYY-MM-DD)
443
+ ticker: Stock ticker
444
+ signal: Trading signal data
445
+ expiry_days: Data expiry in days
446
+
447
+ Returns:
448
+ True if successful
449
+ """
450
+ entry = DatabaseEntry(
451
+ date=date,
452
+ data_type=DataType.TECHNICAL_ANALYSIS.value,
453
+ ticker=ticker.upper(),
454
+ data={
455
+ 'analysis_type': 'signal',
456
+ **signal
457
+ },
458
+ metadata={
459
+ 'source': 'technical_analysis'
460
+ }
461
+ )
462
+
463
+ return self.db.save(entry, expiry_days=expiry_days)
464
+
465
+ def get_technical_indicators(self, ticker: str, date_from: str = None,
466
+ date_to: str = None) -> List[DatabaseEntry]:
467
+ """Get technical indicators for ticker"""
468
+ entries = self.db.query(
469
+ ticker=ticker.upper(),
470
+ data_type=DataType.TECHNICAL_ANALYSIS.value,
471
+ date_from=date_from,
472
+ date_to=date_to
473
+ )
474
+
475
+ # Filter for indicators only
476
+ return [e for e in entries if e.data.get('analysis_type') == 'indicators']
477
+
478
+ def get_trading_signals(self, ticker: str, date_from: str = None,
479
+ date_to: str = None) -> List[DatabaseEntry]:
480
+ """Get trading signals for ticker"""
481
+ entries = self.db.query(
482
+ ticker=ticker.upper(),
483
+ data_type=DataType.TECHNICAL_ANALYSIS.value,
484
+ date_from=date_from,
485
+ date_to=date_to
486
+ )
487
+
488
+ # Filter for signals only
489
+ return [e for e in entries if e.data.get('analysis_type') == 'signal']
490
+
491
+
492
+ # Additional methods for CalendarAdapter
493
+ def _add_calendar_methods():
494
+ """Add missing methods to CalendarAdapter"""
495
+
496
+ def save_ipo_event(self, date: str, ticker: str, event_data: Dict[str, Any],
497
+ expiry_days: int = 90) -> bool:
498
+ """Save IPO event to database"""
499
+ entry = DatabaseEntry(
500
+ date=date,
501
+ data_type=DataType.IPO.value,
502
+ ticker=ticker.upper(),
503
+ data={
504
+ 'event_type': 'ipo',
505
+ **event_data
506
+ },
507
+ metadata={
508
+ 'source': 'calendar_scraper',
509
+ 'scraper': 'ipo'
510
+ }
511
+ )
512
+ return self.db.save(entry, expiry_days=expiry_days)
513
+
514
+ def save_stock_split_event(self, date: str, ticker: str, event_data: Dict[str, Any],
515
+ expiry_days: int = 90) -> bool:
516
+ """Save stock split event to database"""
517
+ entry = DatabaseEntry(
518
+ date=date,
519
+ data_type=DataType.STOCK_SPLIT.value,
520
+ ticker=ticker.upper(),
521
+ data={
522
+ 'event_type': 'stock_split',
523
+ **event_data
524
+ },
525
+ metadata={
526
+ 'source': 'calendar_scraper',
527
+ 'scraper': 'stock_split'
528
+ }
529
+ )
530
+ return self.db.save(entry, expiry_days=expiry_days)
531
+
532
+ def save_dividend_event(self, date: str, ticker: str, event_data: Dict[str, Any],
533
+ expiry_days: int = 90) -> bool:
534
+ """Save dividend event to database"""
535
+ entry = DatabaseEntry(
536
+ date=date,
537
+ data_type=DataType.DIVIDENDS.value,
538
+ ticker=ticker.upper(),
539
+ data={
540
+ 'event_type': 'dividend',
541
+ **event_data
542
+ },
543
+ metadata={
544
+ 'source': 'calendar_scraper',
545
+ 'scraper': 'dividend'
546
+ }
547
+ )
548
+ return self.db.save(entry, expiry_days=expiry_days)
549
+
550
+ # Add methods to CalendarAdapter class
551
+ CalendarAdapter.save_ipo_event = save_ipo_event
552
+ CalendarAdapter.save_stock_split_event = save_stock_split_event
553
+ CalendarAdapter.save_dividend_event = save_dividend_event
554
+
555
+ _add_calendar_methods()
556
+
557
+
558
+ # Additional method for FundamentalAdapter
559
+ def _add_fundamental_methods():
560
+ """Add missing methods to FundamentalAdapter"""
561
+
562
+ def save_fundamental_analysis(self, date: str, ticker: str, analysis_data: Dict[str, Any],
563
+ expiry_days: int = 30) -> bool:
564
+ """
565
+ Save complete fundamental analysis to database
566
+ Includes last_processed_datetime for tracking
567
+ """
568
+ entry = DatabaseEntry(
569
+ date=date,
570
+ data_type=DataType.FUNDAMENTAL.value,
571
+ ticker=ticker.upper(),
572
+ data={
573
+ 'analysis_type': 'complete',
574
+ 'last_processed_datetime': datetime.now().isoformat(),
575
+ **analysis_data
576
+ },
577
+ metadata={
578
+ 'source': 'fundamental_analysis',
579
+ 'module': 'complete_analysis'
580
+ }
581
+ )
582
+ return self.db.save(entry, expiry_days=expiry_days)
583
+
584
+ def get_fundamental_analysis(self, ticker: str, date_from: str = None,
585
+ date_to: str = None) -> List[DatabaseEntry]:
586
+ """Get fundamental analysis for ticker"""
587
+ entries = self.db.query(
588
+ ticker=ticker.upper(),
589
+ data_type=DataType.FUNDAMENTAL.value,
590
+ date_from=date_from,
591
+ date_to=date_to
592
+ )
593
+
594
+ # Filter for complete analysis
595
+ return [e for e in entries if e.data.get('analysis_type') == 'complete']
596
+
597
+ # Add methods to FundamentalAdapter class
598
+ FundamentalAdapter.save_fundamental_analysis = save_fundamental_analysis
599
+ FundamentalAdapter.get_fundamental_analysis = get_fundamental_analysis
600
+
601
+ _add_fundamental_methods()
602
+
603
+
604
+ # Convenience functions for quick access
605
+ def get_calendar_adapter(db: Optional[LocalDatabase] = None) -> CalendarAdapter:
606
+ """Get calendar adapter instance"""
607
+ return CalendarAdapter(db)
608
+
609
+
610
+ def get_fundamental_adapter(db: Optional[LocalDatabase] = None) -> FundamentalAdapter:
611
+ """Get fundamental analysis adapter instance"""
612
+ return FundamentalAdapter(db)
613
+
614
+
615
+ def get_news_adapter(db: Optional[LocalDatabase] = None) -> NewsAdapter:
616
+ """Get news adapter instance"""
617
+ return NewsAdapter(db)
618
+
619
+
620
+ def get_technical_adapter(db: Optional[LocalDatabase] = None) -> TechnicalAnalysisAdapter:
621
+ """Get technical analysis adapter instance"""
622
+ return TechnicalAnalysisAdapter(db)
src/db/db_util.py ADDED
@@ -0,0 +1,352 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ Database management utility for Stock Alchemist
4
+ Similar to cache_util.py but for the local database system
5
+ """
6
+
7
+ import argparse
8
+ import sys
9
+ from pathlib import Path
10
+ from datetime import datetime, timedelta
11
+ import json
12
+
13
+ # Add src to path
14
+ sys.path.append(str(Path(__file__).parent.parent))
15
+
16
+ from db.local_database import LocalDatabase, DataType
17
+
18
+
19
+ def show_stats(db_dir: str):
20
+ """Show database statistics"""
21
+ db = LocalDatabase(db_dir=db_dir)
22
+ stats = db.get_stats()
23
+
24
+ print("\n" + "="*60)
25
+ print("DATABASE STATISTICS")
26
+ print("="*60)
27
+
28
+ print(f"\n📊 Overview:")
29
+ print(f" Total Entries: {stats.get('total_entries', 0):,}")
30
+ print(f" Total Size: {stats.get('total_size_mb', 0)} MB ({stats.get('total_size_bytes', 0):,} bytes)")
31
+ print(f" Compression: {stats.get('compression', 'disabled')}")
32
+ print(f" Expired Entries: {stats.get('expired_entries', 0)}")
33
+
34
+ date_range = stats.get('date_range')
35
+ if date_range:
36
+ print(f" Date Range: {date_range['from']} to {date_range['to']}")
37
+
38
+ by_type = stats.get('by_type', {})
39
+ if by_type:
40
+ print(f"\n📁 By Data Type:")
41
+ for data_type, count in sorted(by_type.items()):
42
+ print(f" {data_type:.<30} {count:>6,}")
43
+
44
+ top_tickers = stats.get('top_tickers', {})
45
+ if top_tickers:
46
+ print(f"\n🏢 Top 10 Tickers:")
47
+ for ticker, count in list(top_tickers.items())[:10]:
48
+ print(f" {ticker:.<20} {count:>6,}")
49
+
50
+ print("\n" + "="*60 + "\n")
51
+
52
+
53
+ def clean_expired(db_dir: str):
54
+ """Clean expired entries"""
55
+ db = LocalDatabase(db_dir=db_dir)
56
+ count = db.clean_expired()
57
+ print(f"✓ Cleaned {count} expired entries")
58
+
59
+
60
+ def clear_all(db_dir: str, confirm: bool = False):
61
+ """Clear all database entries"""
62
+ if not confirm:
63
+ response = input("⚠️ This will delete ALL data. Are you sure? (yes/no): ")
64
+ if response.lower() != 'yes':
65
+ print("❌ Cancelled")
66
+ return
67
+
68
+ db = LocalDatabase(db_dir=db_dir)
69
+ db.clear_all()
70
+
71
+
72
+ def clear_by_type(db_dir: str, data_type: str):
73
+ """Clear entries of specific type"""
74
+ db = LocalDatabase(db_dir=db_dir)
75
+
76
+ # Query all entries of this type
77
+ entries = db.query(data_type=data_type)
78
+
79
+ if not entries:
80
+ print(f"No entries found for type: {data_type}")
81
+ return
82
+
83
+ print(f"Found {len(entries)} entries of type '{data_type}'")
84
+ response = input("Delete these entries? (yes/no): ")
85
+
86
+ if response.lower() != 'yes':
87
+ print("❌ Cancelled")
88
+ return
89
+
90
+ # Delete each entry
91
+ deleted = 0
92
+ for entry in entries:
93
+ if db.delete(entry.date, entry.data_type, entry.ticker):
94
+ deleted += 1
95
+
96
+ print(f"✓ Deleted {deleted} entries")
97
+
98
+
99
+ def clear_by_ticker(db_dir: str, ticker: str):
100
+ """Clear entries for specific ticker"""
101
+ db = LocalDatabase(db_dir=db_dir)
102
+
103
+ # Query all entries for this ticker
104
+ entries = db.query(ticker=ticker)
105
+
106
+ if not entries:
107
+ print(f"No entries found for ticker: {ticker}")
108
+ return
109
+
110
+ print(f"Found {len(entries)} entries for ticker '{ticker}'")
111
+ response = input("Delete these entries? (yes/no): ")
112
+
113
+ if response.lower() != 'yes':
114
+ print("❌ Cancelled")
115
+ return
116
+
117
+ # Delete each entry
118
+ deleted = 0
119
+ for entry in entries:
120
+ if db.delete(entry.date, entry.data_type, entry.ticker):
121
+ deleted += 1
122
+
123
+ print(f"✓ Deleted {deleted} entries")
124
+
125
+
126
+ def clear_older_than(db_dir: str, days: int):
127
+ """Clear entries older than specified days"""
128
+ db = LocalDatabase(db_dir=db_dir)
129
+ cutoff_date = (datetime.now() - timedelta(days=days)).date().isoformat()
130
+
131
+ # Query old entries
132
+ entries = db.query(date_to=cutoff_date)
133
+
134
+ if not entries:
135
+ print(f"No entries older than {days} days")
136
+ return
137
+
138
+ print(f"Found {len(entries)} entries older than {days} days")
139
+ response = input("Delete these entries? (yes/no): ")
140
+
141
+ if response.lower() != 'yes':
142
+ print("❌ Cancelled")
143
+ return
144
+
145
+ # Delete each entry
146
+ deleted = 0
147
+ for entry in entries:
148
+ if db.delete(entry.date, entry.data_type, entry.ticker):
149
+ deleted += 1
150
+
151
+ print(f"✓ Deleted {deleted} entries")
152
+
153
+
154
+ def search(db_dir: str, date_from: str = None, date_to: str = None,
155
+ data_type: str = None, ticker: str = None, limit: int = 10):
156
+ """Search database entries"""
157
+ db = LocalDatabase(db_dir=db_dir)
158
+
159
+ entries = db.query(
160
+ date_from=date_from,
161
+ date_to=date_to,
162
+ data_type=data_type,
163
+ ticker=ticker,
164
+ limit=limit
165
+ )
166
+
167
+ if not entries:
168
+ print("No entries found")
169
+ return
170
+
171
+ print(f"\n📋 Found {len(entries)} entries:\n")
172
+ print(f"{'Date':<12} {'Type':<20} {'Ticker':<10} {'Created':<20}")
173
+ print("-" * 70)
174
+
175
+ for entry in entries:
176
+ created = datetime.fromisoformat(entry.created_at).strftime("%Y-%m-%d %H:%M")
177
+ print(f"{entry.date:<12} {entry.data_type:<20} {entry.ticker:<10} {created:<20}")
178
+
179
+ print()
180
+
181
+
182
+ def export_data(db_dir: str, output_file: str, date_from: str = None,
183
+ date_to: str = None, data_type: str = None, ticker: str = None):
184
+ """Export database entries to JSON file"""
185
+ db = LocalDatabase(db_dir=db_dir)
186
+
187
+ entries = db.query(
188
+ date_from=date_from,
189
+ date_to=date_to,
190
+ data_type=data_type,
191
+ ticker=ticker
192
+ )
193
+
194
+ if not entries:
195
+ print("No entries to export")
196
+ return
197
+
198
+ # Convert to dict list
199
+ export_data = [entry.to_dict() for entry in entries]
200
+
201
+ # Write to file
202
+ with open(output_file, 'w', encoding='utf-8') as f:
203
+ json.dump(export_data, f, indent=2, default=str)
204
+
205
+ print(f"✓ Exported {len(entries)} entries to {output_file}")
206
+
207
+
208
+ def import_data(db_dir: str, input_file: str, expiry_days: int = None):
209
+ """Import database entries from JSON file"""
210
+ db = LocalDatabase(db_dir=db_dir)
211
+
212
+ with open(input_file, 'r', encoding='utf-8') as f:
213
+ data = json.load(f)
214
+
215
+ if not isinstance(data, list):
216
+ print("❌ Invalid format: expected list of entries")
217
+ return
218
+
219
+ from db.local_database import DatabaseEntry
220
+
221
+ entries = []
222
+ for item in data:
223
+ try:
224
+ entry = DatabaseEntry.from_dict(item)
225
+ entries.append(entry)
226
+ except Exception as e:
227
+ print(f"⚠️ Skipping invalid entry: {e}")
228
+
229
+ if not entries:
230
+ print("No valid entries to import")
231
+ return
232
+
233
+ print(f"Found {len(entries)} valid entries")
234
+ response = input("Import these entries? (yes/no): ")
235
+
236
+ if response.lower() != 'yes':
237
+ print("❌ Cancelled")
238
+ return
239
+
240
+ count = db.save_batch(entries, expiry_days=expiry_days)
241
+ print(f"✓ Imported {count} entries")
242
+
243
+
244
+ def list_types():
245
+ """List available data types"""
246
+ print("\n📁 Available Data Types:\n")
247
+ for dt in DataType:
248
+ print(f" - {dt.value}")
249
+ print()
250
+
251
+
252
+ def main():
253
+ parser = argparse.ArgumentParser(
254
+ description="Database management utility for Stock Alchemist",
255
+ formatter_class=argparse.RawDescriptionHelpFormatter,
256
+ epilog="""
257
+ Examples:
258
+ # Show statistics
259
+ python db_util.py --stats
260
+
261
+ # Clean expired entries
262
+ python db_util.py --clean-expired
263
+
264
+ # Search for entries
265
+ python db_util.py --search --ticker AAPL --limit 20
266
+
267
+ # Clear entries by type
268
+ python db_util.py --clear-type financial_info
269
+
270
+ # Clear entries older than 30 days
271
+ python db_util.py --clear-older-than 30
272
+
273
+ # Export data
274
+ python db_util.py --export backup.json --date-from 2025-01-01
275
+
276
+ # Import data
277
+ python db_util.py --import backup.json --expiry-days 30
278
+ """
279
+ )
280
+
281
+ parser.add_argument("--db-dir", type=str, default="database",
282
+ help="Database directory (default: database)")
283
+
284
+ # Actions
285
+ parser.add_argument("--stats", action="store_true",
286
+ help="Show database statistics")
287
+ parser.add_argument("--clean-expired", action="store_true",
288
+ help="Clean expired entries")
289
+ parser.add_argument("--clear-all", action="store_true",
290
+ help="Clear all entries (WARNING: destructive)")
291
+ parser.add_argument("--clear-type", type=str,
292
+ help="Clear entries of specific type")
293
+ parser.add_argument("--clear-ticker", type=str,
294
+ help="Clear entries for specific ticker")
295
+ parser.add_argument("--clear-older-than", type=int,
296
+ help="Clear entries older than N days")
297
+ parser.add_argument("--search", action="store_true",
298
+ help="Search database entries")
299
+ parser.add_argument("--export", type=str,
300
+ help="Export entries to JSON file")
301
+ parser.add_argument("--import", type=str, dest="import_file",
302
+ help="Import entries from JSON file")
303
+ parser.add_argument("--list-types", action="store_true",
304
+ help="List available data types")
305
+
306
+ # Query filters
307
+ parser.add_argument("--date-from", type=str,
308
+ help="Start date (YYYY-MM-DD)")
309
+ parser.add_argument("--date-to", type=str,
310
+ help="End date (YYYY-MM-DD)")
311
+ parser.add_argument("--data-type", type=str,
312
+ help="Filter by data type")
313
+ parser.add_argument("--ticker", type=str,
314
+ help="Filter by ticker")
315
+ parser.add_argument("--limit", type=int, default=10,
316
+ help="Limit number of results (default: 10)")
317
+ parser.add_argument("--expiry-days", type=int,
318
+ help="Set expiry days for imported data")
319
+ parser.add_argument("--yes", action="store_true",
320
+ help="Skip confirmation prompts")
321
+
322
+ args = parser.parse_args()
323
+
324
+ # Execute actions
325
+ if args.stats:
326
+ show_stats(args.db_dir)
327
+ elif args.clean_expired:
328
+ clean_expired(args.db_dir)
329
+ elif args.clear_all:
330
+ clear_all(args.db_dir, confirm=args.yes)
331
+ elif args.clear_type:
332
+ clear_by_type(args.db_dir, args.clear_type)
333
+ elif args.clear_ticker:
334
+ clear_by_ticker(args.db_dir, args.clear_ticker)
335
+ elif args.clear_older_than:
336
+ clear_older_than(args.db_dir, args.clear_older_than)
337
+ elif args.search:
338
+ search(args.db_dir, args.date_from, args.date_to,
339
+ args.data_type, args.ticker, args.limit)
340
+ elif args.export:
341
+ export_data(args.db_dir, args.export, args.date_from,
342
+ args.date_to, args.data_type, args.ticker)
343
+ elif args.import_file:
344
+ import_data(args.db_dir, args.import_file, args.expiry_days)
345
+ elif args.list_types:
346
+ list_types()
347
+ else:
348
+ parser.print_help()
349
+
350
+
351
+ if __name__ == "__main__":
352
+ main()
src/db/examples.py ADDED
@@ -0,0 +1,453 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Example usage of the Local Database system
3
+ Demonstrates integration with all modules
4
+ """
5
+
6
+ from datetime import datetime, timedelta
7
+ from db.local_database import LocalDatabase, DatabaseEntry, DataType
8
+ from db.adapters import (
9
+ CalendarAdapter,
10
+ FundamentalAdapter,
11
+ NewsAdapter,
12
+ TechnicalAnalysisAdapter
13
+ )
14
+
15
+
16
+ def example_calendar_scraper():
17
+ """Example: Save and retrieve calendar events"""
18
+ print("\n" + "="*60)
19
+ print("EXAMPLE 1: Calendar Scraper Integration")
20
+ print("="*60)
21
+
22
+ adapter = CalendarAdapter()
23
+ today = datetime.now().date().isoformat()
24
+
25
+ # Save earnings event
26
+ print("\n📅 Saving earnings event for AAPL...")
27
+ earnings_data = {
28
+ 'company': 'Apple Inc.',
29
+ 'time': 'After Market Close',
30
+ 'eps_forecast': 1.54,
31
+ 'last_year_eps': 1.46,
32
+ 'revenue': '89.5B',
33
+ 'market_cap': '2.8T'
34
+ }
35
+
36
+ success = adapter.save_earnings_event(
37
+ date=today,
38
+ ticker='AAPL',
39
+ event_data=earnings_data,
40
+ expiry_days=30
41
+ )
42
+ print(f"✓ Saved: {success}")
43
+
44
+ # Save economic event
45
+ print("\n🌍 Saving economic event...")
46
+ economic_data = {
47
+ 'country': 'United States',
48
+ 'importance': 'high',
49
+ 'event': 'Non-Farm Payrolls',
50
+ 'actual': 199000,
51
+ 'forecast': 180000,
52
+ 'previous': 150000
53
+ }
54
+
55
+ success = adapter.save_economic_event(
56
+ date=today,
57
+ event_data=economic_data,
58
+ expiry_days=7
59
+ )
60
+ print(f"✓ Saved: {success}")
61
+
62
+ # Retrieve earnings events
63
+ print("\n📋 Retrieving AAPL earnings events...")
64
+ events = adapter.get_earnings_events('AAPL')
65
+ print(f"Found {len(events)} events")
66
+
67
+ for event in events:
68
+ print(f" Date: {event.date}, Company: {event.data.get('company')}")
69
+
70
+
71
+ def example_fundamental_analysis():
72
+ """Example: Save and retrieve fundamental analysis"""
73
+ print("\n" + "="*60)
74
+ print("EXAMPLE 2: Fundamental Analysis Integration")
75
+ print("="*60)
76
+
77
+ adapter = FundamentalAdapter()
78
+ today = datetime.now().date().isoformat()
79
+
80
+ # Save financial metrics
81
+ print("\n📊 Saving financial metrics for GOOGL...")
82
+ metrics = {
83
+ 'market_cap': 1850000000000,
84
+ 'pe_ratio': 28.5,
85
+ 'fcf_yield': 4.08,
86
+ 'roic': 0.32,
87
+ 'revenue_growth': 0.208,
88
+ 'eps_growth': 0.25,
89
+ 'net_margin': 0.23,
90
+ 'roe': 0.28
91
+ }
92
+
93
+ success = adapter.save_financial_metrics(
94
+ date=today,
95
+ ticker='GOOGL',
96
+ metrics=metrics,
97
+ expiry_days=1
98
+ )
99
+ print(f"✓ Saved: {success}")
100
+
101
+ # Save investment decision
102
+ print("\n🎯 Saving investment decision for GOOGL...")
103
+ decision = {
104
+ 'recommendation': 'BUY',
105
+ 'final_score': 0.67,
106
+ 'confidence': 1.0,
107
+ 'reasoning': [
108
+ 'Strong FCF yield of 4.08%',
109
+ 'Excellent ROIC of 32%',
110
+ 'High revenue growth of 20.8%'
111
+ ],
112
+ 'key_metrics': {
113
+ 'fcf_yield': 4.08,
114
+ 'roic': 32.0,
115
+ 'revenue_growth': 20.8
116
+ },
117
+ 'category_scores': {
118
+ 'fcf_yield': 0.85,
119
+ 'roic': 0.90,
120
+ 'growth': 0.80
121
+ }
122
+ }
123
+
124
+ success = adapter.save_investment_decision(
125
+ date=today,
126
+ ticker='GOOGL',
127
+ decision=decision,
128
+ expiry_days=1
129
+ )
130
+ print(f"✓ Saved: {success}")
131
+
132
+ # Retrieve latest metrics
133
+ print("\n📈 Retrieving latest metrics for GOOGL...")
134
+ entry = adapter.get_financial_metrics('GOOGL')
135
+ if entry:
136
+ print(f" Date: {entry.date}")
137
+ # Check if this is a metrics entry or decision entry
138
+ if 'metrics' in entry.data:
139
+ metrics_data = entry.data['metrics']
140
+ print(f" P/E Ratio: {metrics_data.get('pe_ratio')}")
141
+ print(f" FCF Yield: {metrics_data.get('fcf_yield')}%")
142
+ elif entry.data.get('analysis_type') == 'decision':
143
+ print(f" Recommendation: {entry.data.get('recommendation')}")
144
+ print(f" Score: {entry.data.get('score')}")
145
+ else:
146
+ print(f" Analysis Type: {entry.data.get('analysis_type', 'unknown')}")
147
+
148
+
149
+ def example_news_scraper():
150
+ """Example: Save and retrieve news articles"""
151
+ print("\n" + "="*60)
152
+ print("EXAMPLE 3: News Scraper Integration")
153
+ print("="*60)
154
+
155
+ adapter = NewsAdapter()
156
+ today = datetime.now().date().isoformat()
157
+
158
+ # Save news article
159
+ print("\n📰 Saving news article for TSLA...")
160
+ article = {
161
+ 'title': 'Tesla Announces Record Q4 Deliveries',
162
+ 'content': 'Tesla reported record vehicle deliveries...',
163
+ 'source': 'Bloomberg',
164
+ 'url': 'https://example.com/article',
165
+ 'author': 'John Doe'
166
+ }
167
+
168
+ success = adapter.save_news_article(
169
+ date=today,
170
+ ticker='TSLA',
171
+ article=article,
172
+ expiry_days=30
173
+ )
174
+ print(f"✓ Saved: {success}")
175
+
176
+ # Save sentiment analysis
177
+ print("\n😊 Saving sentiment analysis for TSLA...")
178
+ sentiment = {
179
+ 'model': 'finbert-tone',
180
+ 'sentiment': 'positive',
181
+ 'score': 0.85,
182
+ 'confidence': 0.92,
183
+ 'breakdown': {
184
+ 'positive': 0.85,
185
+ 'neutral': 0.10,
186
+ 'negative': 0.05
187
+ }
188
+ }
189
+
190
+ success = adapter.save_sentiment_analysis(
191
+ date=today,
192
+ ticker='TSLA',
193
+ sentiment=sentiment,
194
+ expiry_days=7
195
+ )
196
+ print(f"✓ Saved: {success}")
197
+
198
+ # Retrieve news articles
199
+ print("\n📋 Retrieving TSLA news articles...")
200
+ articles = adapter.get_news_articles('TSLA')
201
+ print(f"Found {len(articles)} articles")
202
+
203
+ for article in articles:
204
+ print(f" Title: {article.data.get('title')}")
205
+ print(f" Source: {article.data.get('source')}")
206
+
207
+
208
+ def example_technical_analysis():
209
+ """Example: Save and retrieve technical analysis"""
210
+ print("\n" + "="*60)
211
+ print("EXAMPLE 4: Technical Analysis Integration")
212
+ print("="*60)
213
+
214
+ adapter = TechnicalAnalysisAdapter()
215
+ today = datetime.now().date().isoformat()
216
+
217
+ # Save technical indicators
218
+ print("\n📉 Saving technical indicators for NVDA...")
219
+ indicators = {
220
+ 'rsi': 65.5,
221
+ 'macd': 12.3,
222
+ 'macd_signal': 10.8,
223
+ 'sma_50': 450.25,
224
+ 'sma_200': 420.80,
225
+ 'bollinger_upper': 480.00,
226
+ 'bollinger_lower': 430.00,
227
+ 'volume': 45000000
228
+ }
229
+
230
+ success = adapter.save_technical_indicators(
231
+ date=today,
232
+ ticker='NVDA',
233
+ indicators=indicators,
234
+ expiry_days=1
235
+ )
236
+ print(f"✓ Saved: {success}")
237
+
238
+ # Save trading signal
239
+ print("\n🚦 Saving trading signal for NVDA...")
240
+ signal = {
241
+ 'signal_type': 'BUY',
242
+ 'strength': 0.75,
243
+ 'triggers': ['RSI oversold', 'MACD crossover'],
244
+ 'entry_price': 455.00,
245
+ 'stop_loss': 440.00,
246
+ 'take_profit': 480.00
247
+ }
248
+
249
+ success = adapter.save_trading_signal(
250
+ date=today,
251
+ ticker='NVDA',
252
+ signal=signal,
253
+ expiry_days=1
254
+ )
255
+ print(f"✓ Saved: {success}")
256
+
257
+ # Retrieve technical indicators
258
+ print("\n📊 Retrieving NVDA technical indicators...")
259
+ indicators_list = adapter.get_technical_indicators('NVDA')
260
+ print(f"Found {len(indicators_list)} indicator sets")
261
+
262
+ for ind in indicators_list:
263
+ print(f" RSI: {ind.data.get('rsi')}")
264
+ print(f" MACD: {ind.data.get('macd')}")
265
+
266
+
267
+ def example_batch_operations():
268
+ """Example: Batch save operations"""
269
+ print("\n" + "="*60)
270
+ print("EXAMPLE 5: Batch Operations")
271
+ print("="*60)
272
+
273
+ db = LocalDatabase()
274
+ today = datetime.now().date().isoformat()
275
+
276
+ # Create multiple entries
277
+ print("\n💾 Batch saving multiple stock analyses...")
278
+ entries = []
279
+
280
+ tickers = ['AAPL', 'GOOGL', 'MSFT', 'AMZN', 'NVDA']
281
+
282
+ for ticker in tickers:
283
+ entry = DatabaseEntry(
284
+ date=today,
285
+ data_type=DataType.FINANCIAL_INFO.value,
286
+ ticker=ticker,
287
+ data={
288
+ 'analysis_type': 'quick_scan',
289
+ 'price': 100.0 + len(ticker), # Dummy data
290
+ 'volume': 1000000 * len(ticker)
291
+ },
292
+ metadata={'batch_id': 'scan_001'}
293
+ )
294
+ entries.append(entry)
295
+
296
+ count = db.save_batch(entries, expiry_days=1)
297
+ print(f"✓ Saved {count}/{len(entries)} entries")
298
+
299
+
300
+ def example_query_operations():
301
+ """Example: Advanced queries"""
302
+ print("\n" + "="*60)
303
+ print("EXAMPLE 6: Query Operations")
304
+ print("="*60)
305
+
306
+ db = LocalDatabase()
307
+
308
+ # Query by date range
309
+ print("\n🔍 Querying all financial data from last 7 days...")
310
+ date_from = (datetime.now() - timedelta(days=7)).date().isoformat()
311
+
312
+ entries = db.query(
313
+ data_type=DataType.FINANCIAL_INFO.value,
314
+ date_from=date_from,
315
+ limit=10
316
+ )
317
+
318
+ print(f"Found {len(entries)} entries:")
319
+ for entry in entries[:5]: # Show first 5
320
+ print(f" {entry.date} - {entry.ticker} - {entry.data.get('analysis_type', 'N/A')}")
321
+
322
+ # Query specific ticker
323
+ print("\n🔍 Querying all data for AAPL...")
324
+ aapl_entries = db.query(ticker='AAPL', limit=5)
325
+ print(f"Found {len(aapl_entries)} AAPL entries")
326
+
327
+
328
+ def example_database_stats():
329
+ """Example: Database statistics"""
330
+ print("\n" + "="*60)
331
+ print("EXAMPLE 7: Database Statistics")
332
+ print("="*60)
333
+
334
+ db = LocalDatabase()
335
+ stats = db.get_stats()
336
+
337
+ print(f"\n📊 Database Overview:")
338
+ print(f" Total Entries: {stats.get('total_entries', 0):,}")
339
+ print(f" Total Size: {stats.get('total_size_mb', 0)} MB")
340
+ print(f" Expired Entries: {stats.get('expired_entries', 0)}")
341
+
342
+ by_type = stats.get('by_type', {})
343
+ if by_type:
344
+ print(f"\n📁 By Data Type:")
345
+ for data_type, count in by_type.items():
346
+ print(f" {data_type}: {count:,}")
347
+
348
+
349
+ def example_integration_workflow():
350
+ """Example: Complete workflow combining all modules"""
351
+ print("\n" + "="*60)
352
+ print("EXAMPLE 8: Complete Integration Workflow")
353
+ print("="*60)
354
+
355
+ ticker = 'AAPL'
356
+ today = datetime.now().date().isoformat()
357
+
358
+ # 1. Check calendar for upcoming events
359
+ print(f"\n1️⃣ Checking calendar for {ticker}...")
360
+ calendar_adapter = CalendarAdapter()
361
+ earnings = calendar_adapter.get_earnings_events(ticker)
362
+ print(f" Found {len(earnings)} upcoming earnings events")
363
+
364
+ # 2. Save fundamental analysis
365
+ print(f"\n2️⃣ Saving fundamental analysis for {ticker}...")
366
+ fundamental_adapter = FundamentalAdapter()
367
+
368
+ decision = {
369
+ 'recommendation': 'HOLD',
370
+ 'final_score': 0.31,
371
+ 'confidence': 0.85,
372
+ 'reasoning': ['Strong ROIC but low FCF yield'],
373
+ 'key_metrics': {'roic': 73.8, 'fcf_yield': 2.36}
374
+ }
375
+
376
+ fundamental_adapter.save_investment_decision(today, ticker, decision)
377
+ print(" ✓ Decision saved")
378
+
379
+ # 3. Check news sentiment
380
+ print(f"\n3️⃣ Saving news sentiment for {ticker}...")
381
+ news_adapter = NewsAdapter()
382
+
383
+ sentiment = {
384
+ 'model': 'finbert-tone',
385
+ 'sentiment': 'neutral',
386
+ 'score': 0.55
387
+ }
388
+
389
+ news_adapter.save_sentiment_analysis(today, ticker, sentiment)
390
+ print(" ✓ Sentiment saved")
391
+
392
+ # 4. Save technical signal
393
+ print(f"\n4️⃣ Saving technical signal for {ticker}...")
394
+ technical_adapter = TechnicalAnalysisAdapter()
395
+
396
+ signal = {
397
+ 'signal_type': 'HOLD',
398
+ 'strength': 0.60,
399
+ 'triggers': ['Neutral RSI', 'No clear pattern']
400
+ }
401
+
402
+ technical_adapter.save_trading_signal(today, ticker, signal)
403
+ print(" ✓ Signal saved")
404
+
405
+ # 5. Comprehensive analysis
406
+ print(f"\n5️⃣ Retrieving comprehensive analysis for {ticker}...")
407
+ db = LocalDatabase()
408
+ all_data = db.query(ticker=ticker, date_from=today)
409
+
410
+ print(f"\n📊 Complete {ticker} Analysis ({today}):")
411
+ print(f" Total data points: {len(all_data)}")
412
+
413
+ for entry in all_data:
414
+ data_type = entry.data_type
415
+ if data_type == 'financial_info':
416
+ rec = entry.data.get('recommendation', 'N/A')
417
+ print(f" 💰 Fundamental: {rec}")
418
+ elif data_type == 'news':
419
+ sent = entry.data.get('sentiment', 'N/A')
420
+ print(f" 📰 News Sentiment: {sent}")
421
+ elif data_type == 'technical_analysis':
422
+ sig = entry.data.get('signal_type', 'N/A')
423
+ print(f" 📉 Technical Signal: {sig}")
424
+
425
+
426
+ def run_all_examples():
427
+ """Run all examples"""
428
+ print("\n" + "="*60)
429
+ print("LOCAL DATABASE SYSTEM - EXAMPLE USAGE")
430
+ print("="*60)
431
+
432
+ try:
433
+ example_calendar_scraper()
434
+ example_fundamental_analysis()
435
+ example_news_scraper()
436
+ example_technical_analysis()
437
+ example_batch_operations()
438
+ example_query_operations()
439
+ example_database_stats()
440
+ example_integration_workflow()
441
+
442
+ print("\n" + "="*60)
443
+ print("✓ All examples completed successfully!")
444
+ print("="*60 + "\n")
445
+
446
+ except Exception as e:
447
+ print(f"\n❌ Error running examples: {e}")
448
+ import traceback
449
+ traceback.print_exc()
450
+
451
+
452
+ if __name__ == "__main__":
453
+ run_all_examples()
src/db/get_event_from_db.py ADDED
@@ -0,0 +1,49 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from datetime import datetime, timedelta
2
+ from db.local_database import LocalDatabase, DatabaseEntry, DataType
3
+ from db.adapters import (
4
+ CalendarAdapter,
5
+ FundamentalAdapter,
6
+ NewsAdapter,
7
+ TechnicalAnalysisAdapter
8
+ )
9
+
10
+ def get_events(type:str, date_str:str):
11
+ """Retrieve event data from the local database for a given type, ticker, and date"""
12
+ db = LocalDatabase()
13
+ entries = db.query(date_from=date_str, data_type=type)
14
+ for entry in entries:
15
+ if entry:
16
+ return entry.data ## RETURN FIRST MATCHING ENTRY ONLY
17
+ return None
18
+
19
+ def get_event_ticker(type:str, ticker:str, date_str:str):
20
+ """Retrieve event data from the local database for a given type, ticker, and date"""
21
+ db = LocalDatabase()
22
+ entry = db.get(date_str=date_str, data_type=type,ticker=ticker)
23
+ if entry:
24
+ return entry.data
25
+ return None
26
+
27
+
28
+
29
+ if __name__ == "__main__":
30
+ # Example usage
31
+ date_str = datetime.now().strftime("%Y-%m-%d")
32
+ ticker = "ABNB"
33
+
34
+ event_types = [
35
+ DataType.DIVIDENDS.value,
36
+ DataType.STOCK_SPLIT.value,
37
+ DataType.IPO.value,
38
+ DataType.EARNINGS.value,
39
+ DataType.ECONOMIC_EVENTS.value,
40
+ DataType.FUNDAMENTAL.value,
41
+ DataType.NEWS.value,
42
+ DataType.TECHNICAL_ANALYSIS.value
43
+ ]
44
+
45
+ for event_type in event_types:
46
+ print(event_type)
47
+ event_data = get_events(event_type, date_str)
48
+ print(f"Event Type: {event_type}, Date: {date_str}")
49
+ print(f"Data: {event_data}\n")
src/db/isrgrootx1.pem ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ -----BEGIN CERTIFICATE-----
2
+ MIIFazCCA1OgAwIBAgIRAIIQz7DSQONZRGPgu2OCiwAwDQYJKoZIhvcNAQELBQAw
3
+ TzELMAkGA1UEBhMCVVMxKTAnBgNVBAoTIEludGVybmV0IFNlY3VyaXR5IFJlc2Vh
4
+ cmNoIEdyb3VwMRUwEwYDVQQDEwxJU1JHIFJvb3QgWDEwHhcNMTUwNjA0MTEwNDM4
5
+ WhcNMzUwNjA0MTEwNDM4WjBPMQswCQYDVQQGEwJVUzEpMCcGA1UEChMgSW50ZXJu
6
+ ZXQgU2VjdXJpdHkgUmVzZWFyY2ggR3JvdXAxFTATBgNVBAMTDElTUkcgUm9vdCBY
7
+ MTCCAiIwDQYJKoZIhvcNAQEBBQADggIPADCCAgoCggIBAK3oJHP0FDfzm54rVygc
8
+ h77ct984kIxuPOZXoHj3dcKi/vVqbvYATyjb3miGbESTtrFj/RQSa78f0uoxmyF+
9
+ 0TM8ukj13Xnfs7j/EvEhmkvBioZxaUpmZmyPfjxwv60pIgbz5MDmgK7iS4+3mX6U
10
+ A5/TR5d8mUgjU+g4rk8Kb4Mu0UlXjIB0ttov0DiNewNwIRt18jA8+o+u3dpjq+sW
11
+ T8KOEUt+zwvo/7V3LvSye0rgTBIlDHCNAymg4VMk7BPZ7hm/ELNKjD+Jo2FR3qyH
12
+ B5T0Y3HsLuJvW5iB4YlcNHlsdu87kGJ55tukmi8mxdAQ4Q7e2RCOFvu396j3x+UC
13
+ B5iPNgiV5+I3lg02dZ77DnKxHZu8A/lJBdiB3QW0KtZB6awBdpUKD9jf1b0SHzUv
14
+ KBds0pjBqAlkd25HN7rOrFleaJ1/ctaJxQZBKT5ZPt0m9STJEadao0xAH0ahmbWn
15
+ OlFuhjuefXKnEgV4We0+UXgVCwOPjdAvBbI+e0ocS3MFEvzG6uBQE3xDk3SzynTn
16
+ jh8BCNAw1FtxNrQHusEwMFxIt4I7mKZ9YIqioymCzLq9gwQbooMDQaHWBfEbwrbw
17
+ qHyGO0aoSCqI3Haadr8faqU9GY/rOPNk3sgrDQoo//fb4hVC1CLQJ13hef4Y53CI
18
+ rU7m2Ys6xt0nUW7/vGT1M0NPAgMBAAGjQjBAMA4GA1UdDwEB/wQEAwIBBjAPBgNV
19
+ HRMBAf8EBTADAQH/MB0GA1UdDgQWBBR5tFnme7bl5AFzgAiIyBpY9umbbjANBgkq
20
+ hkiG9w0BAQsFAAOCAgEAVR9YqbyyqFDQDLHYGmkgJykIrGF1XIpu+ILlaS/V9lZL
21
+ ubhzEFnTIZd+50xx+7LSYK05qAvqFyFWhfFQDlnrzuBZ6brJFe+GnY+EgPbk6ZGQ
22
+ 3BebYhtF8GaV0nxvwuo77x/Py9auJ/GpsMiu/X1+mvoiBOv/2X/qkSsisRcOj/KK
23
+ NFtY2PwByVS5uCbMiogziUwthDyC3+6WVwW6LLv3xLfHTjuCvjHIInNzktHCgKQ5
24
+ ORAzI4JMPJ+GslWYHb4phowim57iaztXOoJwTdwJx4nLCgdNbOhdjsnvzqvHu7Ur
25
+ TkXWStAmzOVyyghqpZXjFaH3pO3JLF+l+/+sKAIuvtd7u+Nxe5AW0wdeRlN8NwdC
26
+ jNPElpzVmbUq4JUagEiuTDkHzsxHpFKVK7q4+63SM1N95R1NbdWhscdCb+ZAJzVc
27
+ oyi3B43njTOQ5yOf+1CceWxG1bQVs5ZufpsMljq4Ui0/1lvh+wjChP4kqKOJ2qxq
28
+ 4RgqsahDYVvTH9w7jXbyLeiNdd8XM2w9U/t7y0Ff/9yi0GE44Za4rF2LN9d11TPA
29
+ mRGunUHBcnWEvgJBQl9nJEiU0Zsnvgc/ubhPgXRR4Xq37Z0j4r7g1SgEEzwxA57d
30
+ emyPxgcYxn/eR44/KJ4EBs+lVDR3veyJm+kXQ99b21/+jh5Xos1AnX5iItreGCc=
31
+ -----END CERTIFICATE-----
src/db/local_database.py ADDED
@@ -0,0 +1,1217 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Advanced Local Database Manager for Stock Alchemist
3
+ Stores data as JSON files organized by date, type, and ticker
4
+ Uses MySQL for indexing and JSON files for data storage
5
+ """
6
+
7
+ import json
8
+ import mysql.connector
9
+ from mysql.connector import Error
10
+ from datetime import date, datetime, timedelta
11
+ from pathlib import Path
12
+ from typing import Any, Dict, List, Optional, Union
13
+ from dataclasses import dataclass, asdict, field
14
+ from enum import Enum
15
+ import hashlib
16
+ import gzip
17
+ import numpy as np
18
+
19
+
20
+ class DataType(Enum):
21
+ """Supported data types - simplified naming"""
22
+ # Calendar events (no prefix needed)
23
+ EARNINGS = "earnings"
24
+ IPO = "ipo"
25
+ STOCK_SPLIT = "stock_split"
26
+ DIVIDENDS = "dividends"
27
+ ECONOMIC_EVENTS = "economic_events"
28
+ # Other data types
29
+ FUNDAMENTAL = "fundamental_analysis"
30
+ NEWS = "news"
31
+ TECHNICAL_ANALYSIS = "technical_analysis"
32
+
33
+
34
+ @dataclass
35
+ class DatabaseEntry:
36
+ """Base class for database entries"""
37
+ date: str # ISO format YYYY-MM-DD
38
+ data_type: str # DataType enum value
39
+ ticker: str
40
+ data: Dict[str, Any]
41
+ created_at: str = field(default_factory=lambda: datetime.now().isoformat())
42
+ updated_at: str = field(default_factory=lambda: datetime.now().isoformat())
43
+ expiry_date: Optional[str] = None
44
+ metadata: Dict[str, Any] = field(default_factory=dict)
45
+
46
+ def to_dict(self):
47
+ """Convert to dictionary"""
48
+ return asdict(self)
49
+
50
+ @classmethod
51
+ def from_dict(cls, data: Dict):
52
+ """Create from dictionary"""
53
+ return cls(**data)
54
+
55
+ def generate_key(self):
56
+ """
57
+ Generate unique key for this entry
58
+ For calendar events, includes execution_date/ex_date to prevent duplicates
59
+ """
60
+ # For calendar events, include the actual event date to ensure uniqueness
61
+ if self.data_type in ['earnings', 'ipo', 'stock_split', 'dividends']:
62
+ event_date = (self.data.get('execution_date') or
63
+ self.data.get('ex_date') or
64
+ self.data.get('date') or
65
+ self.date)
66
+ key_string = f"{self.data_type}_{self.ticker}_{event_date}"
67
+ else:
68
+ key_string = f"{self.date}_{self.data_type}_{self.ticker}"
69
+
70
+ return hashlib.md5(key_string.encode()).hexdigest()
71
+
72
+
73
+ class LocalDatabase:
74
+ """
75
+ Advanced local database manager with MySQL index and JSON file storage
76
+
77
+ Features:
78
+ - MySQL metadata index for fast queries
79
+ - JSON files for actual data storage
80
+ - Optional gzip compression for large data
81
+ - Automatic expiry and cleanup
82
+ - Date/Type/Ticker indexing
83
+ - Batch operations support
84
+ """
85
+
86
+ def __init__(self, db_dir: str = "database", compress: bool = False):
87
+ """
88
+ Initialize database manager
89
+
90
+ Args:
91
+ db_dir: Root directory for database storage
92
+ compress: Whether to compress JSON files with gzip
93
+ """
94
+ self.db_dir = Path(db_dir)
95
+ self.db_dir.mkdir(exist_ok=True)
96
+
97
+ # Create subdirectories for organization
98
+ self.data_dir = self.db_dir / "data"
99
+ self.data_dir.mkdir(exist_ok=True)
100
+
101
+ # Load environment variables
102
+ from dotenv import load_dotenv
103
+ import os
104
+ load_dotenv()
105
+
106
+ # MySQL connection parameters from environment variables
107
+ self.mysql_config = {
108
+ 'host': os.getenv('DB_HOST', 'localhost').strip(),
109
+ 'user': os.getenv('DB_USERNAME', 'root').strip(),
110
+ 'password': os.getenv('DB_PASSWORD', '').strip(),
111
+ 'database': os.getenv('DB_DATABASE', 'gotti').strip(),
112
+ 'port': int(os.getenv('DB_PORT', 3306))
113
+ }
114
+
115
+ # SSL Configuration for TiDB
116
+ ssl_ca = os.getenv('DB_SSL_CA')
117
+ if ssl_ca:
118
+ # Resolve relative path if needed
119
+ if not os.path.isabs(ssl_ca):
120
+ # Assuming src/isrgrootx1.pem is relative to project root
121
+ # self.db_dir is usually project_root/database
122
+ # So project root is self.db_dir.parent
123
+ project_root = Path(__file__).parent.parent.parent
124
+ ssl_ca_path = project_root / ssl_ca
125
+
126
+ if ssl_ca_path.exists():
127
+ self.mysql_config['ssl_ca'] = str(ssl_ca_path)
128
+ self.mysql_config['ssl_verify_cert'] = True
129
+ self.mysql_config['ssl_verify_identity'] = True
130
+ else:
131
+ print(f"⚠️ SSL CA file not found at {ssl_ca_path}")
132
+
133
+ self.compress = compress
134
+
135
+ self._init_database()
136
+
137
+ def _create_connection(self):
138
+ """Create and return a MySQL database connection"""
139
+ try:
140
+ connection = mysql.connector.connect(**self.mysql_config)
141
+ return connection
142
+ except Error as e:
143
+ print(f"❌ Error connecting to MySQL: {e}")
144
+ return None
145
+
146
+ def _get_table_name(self, data_type: str) -> str:
147
+ """Determine which table to use based on data_type"""
148
+ # Calendar events
149
+ if data_type in ['earnings', 'ipo', 'stock_split', 'dividends', 'economic_events']:
150
+ return 'calendar'
151
+ # News
152
+ elif data_type == 'news':
153
+ return 'news'
154
+ # Fundamental analysis
155
+ elif data_type == 'fundamental_analysis':
156
+ return 'fundamental_analysis'
157
+ else:
158
+ raise ValueError(f"Unknown data type: {data_type}")
159
+
160
+ def _init_database(self):
161
+ """Initialize MySQL tables - three separate tables by data category"""
162
+ conn = self._create_connection()
163
+ if not conn:
164
+ raise Exception("Failed to connect to MySQL database")
165
+
166
+ cursor = conn.cursor()
167
+
168
+ try:
169
+ # Create calendar table
170
+ cursor.execute('''
171
+ CREATE TABLE IF NOT EXISTS calendar (
172
+ entry_key VARCHAR(32) PRIMARY KEY,
173
+ date DATE NOT NULL,
174
+ event_type VARCHAR(50) NOT NULL,
175
+ ticker VARCHAR(20) NOT NULL,
176
+ data JSON NOT NULL,
177
+ created_at DATETIME NOT NULL,
178
+ updated_at DATETIME NOT NULL,
179
+ expiry_date DATE,
180
+ metadata JSON,
181
+ execution_date DATE,
182
+ INDEX idx_date (date),
183
+ INDEX idx_event_type (event_type),
184
+ INDEX idx_ticker (ticker),
185
+ INDEX idx_date_event (date, event_type),
186
+ INDEX idx_ticker_event (ticker, event_type)
187
+ ) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4
188
+ ''')
189
+
190
+ # Create news table
191
+ cursor.execute('''
192
+ CREATE TABLE IF NOT EXISTS news (
193
+ entry_key VARCHAR(32) PRIMARY KEY,
194
+ date DATE NOT NULL,
195
+ ticker VARCHAR(20) NOT NULL,
196
+ data JSON NOT NULL,
197
+ created_at DATETIME NOT NULL,
198
+ updated_at DATETIME NOT NULL,
199
+ expiry_date DATE,
200
+ metadata JSON,
201
+ INDEX idx_date (date),
202
+ INDEX idx_ticker (ticker),
203
+ INDEX idx_date_ticker (date, ticker)
204
+ ) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4
205
+ ''')
206
+
207
+ # Create fundamental_analysis table
208
+ cursor.execute('''
209
+ CREATE TABLE IF NOT EXISTS fundamental_analysis (
210
+ entry_key VARCHAR(32) PRIMARY KEY,
211
+ date DATE NOT NULL,
212
+ ticker VARCHAR(20) NOT NULL,
213
+ data JSON NOT NULL,
214
+ created_at DATETIME NOT NULL,
215
+ updated_at DATETIME NOT NULL,
216
+ expiry_date DATE,
217
+ metadata JSON,
218
+ INDEX idx_date (date),
219
+ INDEX idx_ticker (ticker),
220
+ INDEX idx_date_ticker (date, ticker)
221
+ ) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4
222
+ ''')
223
+
224
+ # Create available_tickers table - whitelist of allowed tickers
225
+ cursor.execute('''
226
+ CREATE TABLE IF NOT EXISTS available_tickers (
227
+ ticker VARCHAR(20) PRIMARY KEY,
228
+ name VARCHAR(255),
229
+ exchange VARCHAR(50),
230
+ sector VARCHAR(100),
231
+ is_active BOOLEAN DEFAULT TRUE,
232
+ added_at DATETIME NOT NULL,
233
+ updated_at DATETIME NOT NULL,
234
+ metadata JSON,
235
+ INDEX idx_is_active (is_active),
236
+ INDEX idx_exchange (exchange),
237
+ INDEX idx_sector (sector)
238
+ ) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4
239
+ ''')
240
+
241
+ # Create signals table - tracks actionable ticker signals
242
+ cursor.execute('''
243
+ CREATE TABLE IF NOT EXISTS signals (
244
+ signal_id VARCHAR(32) PRIMARY KEY,
245
+ ticker VARCHAR(20) NOT NULL,
246
+ signal_date DATE NOT NULL,
247
+ signal_position VARCHAR(10) NOT NULL,
248
+ calendar_event_keys JSON,
249
+ news_keys JSON,
250
+ fundamental_analysis_key VARCHAR(32),
251
+ sentiment JSON,
252
+ created_at DATETIME NOT NULL,
253
+ updated_at DATETIME NOT NULL,
254
+ metadata JSON,
255
+ INDEX idx_ticker (ticker),
256
+ INDEX idx_signal_date (signal_date),
257
+ INDEX idx_ticker_date (ticker, signal_date),
258
+ INDEX idx_created_at (created_at)
259
+ ) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4
260
+ ''')
261
+
262
+ # Add execution_date column to calendar table if it doesn't exist
263
+ try:
264
+ cursor.execute("ALTER TABLE calendar ADD COLUMN execution_date DATE AFTER date")
265
+ print("✅ Added execution_date column to calendar table")
266
+ except Error as e:
267
+ if e.errno != 1060: # Error 1060 = Duplicate column name
268
+ pass # Column already exists, ignore
269
+
270
+ # Add sentiment column to signals table if it doesn't exist
271
+ try:
272
+ cursor.execute("ALTER TABLE signals ADD COLUMN sentiment JSON AFTER fundamental_analysis_key")
273
+ print("✅ Added sentiment column to signals table")
274
+ except Error as e:
275
+ if e.errno != 1060: # Error 1060 = Duplicate column name
276
+ pass # Column already exists, ignore
277
+
278
+ # Add signal_position column to signals table if it doesn't exist
279
+ try:
280
+ cursor.execute("ALTER TABLE signals ADD COLUMN signal_position VARCHAR(10) AFTER signal_date")
281
+ print("✅ Added signal_position column to signals table")
282
+ except Error as e:
283
+ if e.errno != 1060: # Error 1060 = Duplicate column name
284
+ pass # Column already exists, ignore
285
+
286
+ conn.commit()
287
+ print("✅ MySQL database tables initialized successfully (calendar, news, fundamental_analysis, available_tickers, signals)")
288
+ except Error as e:
289
+ print(f"❌ Error initializing database: {e}")
290
+ # Try without IF NOT EXISTS for MySQL versions that don't support it
291
+ try:
292
+ cursor.execute("SHOW COLUMNS FROM calendar LIKE 'execution_date'")
293
+ if cursor.fetchone() is None:
294
+ cursor.execute("ALTER TABLE calendar ADD COLUMN execution_date DATE AFTER date")
295
+ conn.commit()
296
+ print("✅ Added execution_date column to calendar table")
297
+ except Exception as alter_error:
298
+ print(f"⚠️ Could not add execution_date column: {alter_error}")
299
+
300
+ # Try adding sentiment column for older MySQL versions
301
+ try:
302
+ cursor.execute("SHOW COLUMNS FROM signals LIKE 'sentiment'")
303
+ if cursor.fetchone() is None:
304
+ cursor.execute("ALTER TABLE signals ADD COLUMN sentiment JSON AFTER fundamental_analysis_key")
305
+ conn.commit()
306
+ print("✅ Added sentiment column to signals table")
307
+ except Exception as alter_error:
308
+ print(f"⚠️ Could not add sentiment column: {alter_error}")
309
+
310
+ # Try adding signal_position column for older MySQL versions
311
+ try:
312
+ cursor.execute("SHOW COLUMNS FROM signals LIKE 'signal_position'")
313
+ if cursor.fetchone() is None:
314
+ cursor.execute("ALTER TABLE signals ADD COLUMN signal_position VARCHAR(10) AFTER signal_date")
315
+ conn.commit()
316
+ print("✅ Added signal_position column to signals table")
317
+ except Exception as alter_error:
318
+ print(f"⚠️ Could not add signal_position column: {alter_error}")
319
+ finally:
320
+ cursor.close()
321
+ conn.close()
322
+
323
+ def _generate_file_path(self, entry_key: str, data_type: str, date_str: str) -> Path:
324
+ """Generate organized file path for data storage"""
325
+ # Organize by type/year/month/
326
+ year_month = datetime.fromisoformat(date_str).strftime("%Y/%m")
327
+ type_dir = self.data_dir / data_type / year_month
328
+ type_dir.mkdir(parents=True, exist_ok=True)
329
+
330
+ extension = ".json.gz" if self.compress else ".json"
331
+ return type_dir / f"{entry_key}{extension}"
332
+
333
+ def _write_json(self, file_path: Path, data: Dict):
334
+ """Write JSON data with optional compression"""
335
+ json_str = json.dumps(data, indent=2, default=str)
336
+
337
+ if self.compress:
338
+ with gzip.open(file_path, 'wt', encoding='utf-8') as f:
339
+ f.write(json_str)
340
+ else:
341
+ with open(file_path, 'w', encoding='utf-8') as f:
342
+ f.write(json_str)
343
+
344
+ def _read_json(self, file_path: Path, compressed: bool) -> Dict:
345
+ """Read JSON data with optional decompression"""
346
+ if compressed:
347
+ with gzip.open(file_path, 'rt', encoding='utf-8') as f:
348
+ return json.load(f)
349
+ else:
350
+ with open(file_path, 'r', encoding='utf-8') as f:
351
+ return json.load(f)
352
+
353
+ def _clean_data_for_json(self, data: Any) -> Any:
354
+ """
355
+ Recursively clean data to ensure it's JSON serializable.
356
+ - Converts NaN/Infinity to None
357
+ - Converts numpy types to native python types
358
+ """
359
+ if isinstance(data, dict):
360
+ return {k: self._clean_data_for_json(v) for k, v in data.items()}
361
+ elif isinstance(data, list):
362
+ return [self._clean_data_for_json(v) for v in data]
363
+ elif isinstance(data, float):
364
+ if np.isnan(data) or np.isinf(data):
365
+ return None
366
+ return float(data)
367
+ elif isinstance(data, np.integer):
368
+ return int(data)
369
+ elif isinstance(data, np.floating):
370
+ if np.isnan(data) or np.isinf(data):
371
+ return None
372
+ return float(data)
373
+ elif isinstance(data, np.ndarray):
374
+ return self._clean_data_for_json(data.tolist())
375
+ return data
376
+
377
+ def add_ticker(self, ticker: str, name: str = None, exchange: str = None,
378
+ sector: str = None, metadata: Dict = None) -> bool:
379
+ """
380
+ Add a ticker to the available_tickers whitelist.
381
+
382
+ Args:
383
+ ticker: Ticker symbol
384
+ name: Company name
385
+ exchange: Exchange name (e.g., 'NASDAQ', 'NYSE')
386
+ sector: Company sector
387
+ metadata: Additional metadata as JSON
388
+
389
+ Returns:
390
+ True if successful, False otherwise
391
+ """
392
+ conn = self._create_connection()
393
+ if not conn:
394
+ return False
395
+
396
+ cursor = conn.cursor()
397
+ try:
398
+ now = datetime.now()
399
+ cursor.execute('''
400
+ INSERT INTO available_tickers
401
+ (ticker, name, exchange, sector, is_active, added_at, updated_at, metadata)
402
+ VALUES (%s, %s, %s, %s, %s, %s, %s, %s)
403
+ ON DUPLICATE KEY UPDATE
404
+ name = VALUES(name),
405
+ exchange = VALUES(exchange),
406
+ sector = VALUES(sector),
407
+ is_active = VALUES(is_active),
408
+ updated_at = VALUES(updated_at),
409
+ metadata = VALUES(metadata)
410
+ ''', (
411
+ ticker.upper(),
412
+ name,
413
+ exchange,
414
+ sector,
415
+ True,
416
+ now,
417
+ now,
418
+ json.dumps(metadata) if metadata else None
419
+ ))
420
+ conn.commit()
421
+ return True
422
+ except Error as e:
423
+ print(f"❌ Error adding ticker {ticker}: {e}")
424
+ return False
425
+ finally:
426
+ cursor.close()
427
+ conn.close()
428
+
429
+ def get_macroeconomic_indicators(self) -> Dict[str, Any]:
430
+ """
431
+ Retrieve macroeconomic indicators from the database.
432
+
433
+ Returns:
434
+ Dictionary of macroeconomic indicators
435
+ """
436
+ conn = self._create_connection()
437
+ if not conn:
438
+ return {}
439
+
440
+ cursor = conn.cursor()
441
+ try:
442
+ cursor.execute('''
443
+ SELECT data
444
+ FROM macroeconomic_indicators
445
+ ORDER BY date DESC
446
+ LIMIT 1
447
+ ''')
448
+ row = cursor.fetchone()
449
+ if row:
450
+ return json.loads(row[0])
451
+ return {}
452
+ except Error as e:
453
+ print(f"❌ Error fetching macroeconomic indicators: {e}")
454
+ return {}
455
+ finally:
456
+ cursor.close()
457
+ conn.close()
458
+
459
+ def remove_ticker(self, ticker: str) -> bool:
460
+ """
461
+ Deactivate a ticker (soft delete - sets is_active to False).
462
+
463
+ Args:
464
+ ticker: Ticker symbol to deactivate
465
+
466
+ Returns:
467
+ True if successful, False otherwise
468
+ """
469
+ conn = self._create_connection()
470
+ if not conn:
471
+ return False
472
+
473
+ cursor = conn.cursor()
474
+ try:
475
+ cursor.execute(
476
+ "UPDATE available_tickers SET is_active = FALSE, updated_at = %s WHERE ticker = %s",
477
+ (datetime.now(), ticker.upper())
478
+ )
479
+ conn.commit()
480
+ return cursor.rowcount > 0
481
+ except Error as e:
482
+ print(f"❌ Error removing ticker {ticker}: {e}")
483
+ return False
484
+ finally:
485
+ cursor.close()
486
+ conn.close()
487
+
488
+ def get_all_available_tickers(self) -> List[str]:
489
+ """
490
+ Get all active tickers from the whitelist.
491
+
492
+ Returns:
493
+ List of ticker symbols
494
+ """
495
+ conn = self._create_connection()
496
+ if not conn:
497
+ return []
498
+
499
+ cursor = conn.cursor()
500
+ try:
501
+ cursor.execute("SELECT ticker FROM available_tickers WHERE is_active = TRUE ORDER BY ticker")
502
+ return [row[0] for row in cursor.fetchall()]
503
+ except Error as e:
504
+ print(f"❌ Error fetching available tickers: {e}")
505
+ return []
506
+ finally:
507
+ cursor.close()
508
+ conn.close()
509
+
510
+ def is_ticker_available(self, ticker: str) -> bool:
511
+ """
512
+ Check if ticker is in the available_tickers whitelist.
513
+
514
+ Args:
515
+ ticker: Ticker symbol to check
516
+ Returns:
517
+ True if ticker is available and active, False otherwise
518
+ """
519
+ conn = self._create_connection()
520
+ if not conn:
521
+ return False
522
+ cursor = conn.cursor()
523
+ try:
524
+ cursor.execute(
525
+ "SELECT is_active FROM available_tickers WHERE ticker = %s",
526
+ (ticker.upper(),)
527
+ )
528
+ result = cursor.fetchone()
529
+
530
+ if result and result[0]: # Ticker exists and is_active = True
531
+ return True
532
+ return False
533
+ except Error as e:
534
+ print(f"❌ Error checking ticker availability for {ticker}: {e}")
535
+ return False
536
+ finally:
537
+ cursor.close()
538
+ conn.close()
539
+
540
+ def save(self, entry: DatabaseEntry, expiry_days: Optional[int] = None) -> bool:
541
+ """
542
+ Save entry to database. Updates existing entry if duplicate is found.
543
+ IMPORTANT: Checks if ticker is in available_tickers whitelist before saving.
544
+
545
+ Args:
546
+ entry: DatabaseEntry to save
547
+ expiry_days: Optional expiry in days
548
+
549
+ Returns:
550
+ True if successful, False if ticker not available or save fails
551
+ """
552
+ try:
553
+ # CRITICAL: Check if ticker is in the available_tickers whitelist
554
+ # Skip check for economic events as they use country names as tickers
555
+ if entry.data_type != DataType.ECONOMIC_EVENTS.value and not self.is_ticker_available(entry.ticker):
556
+ print(f"⚠️ Skipping {entry.data_type} for {entry.ticker} - ticker not in available_tickers whitelist")
557
+ return False
558
+
559
+ entry_key = entry.generate_key()
560
+
561
+ # Get the appropriate table name
562
+ table_name = self._get_table_name(entry.data_type)
563
+
564
+ # Check if entry already exists
565
+ conn = self._create_connection()
566
+ if not conn:
567
+ return False
568
+
569
+ cursor = conn.cursor()
570
+
571
+ cursor.execute(f'SELECT created_at FROM {table_name} WHERE entry_key = %s', (entry_key,))
572
+ existing = cursor.fetchone()
573
+
574
+ # Preserve original created_at if updating
575
+ if existing:
576
+ entry.created_at = str(existing[0])
577
+
578
+ # Update the updated_at timestamp
579
+ entry.updated_at = datetime.now().isoformat()
580
+
581
+ # Calculate expiry date if specified
582
+ if expiry_days:
583
+ expiry_date = (datetime.now() + timedelta(days=expiry_days)).date().isoformat()
584
+ entry.expiry_date = expiry_date
585
+
586
+ # Store data directly as JSON in database
587
+ # Different INSERT statement based on table structure
588
+ if table_name == 'calendar':
589
+ # Calendar table has event_type column
590
+ cursor.execute('''
591
+ INSERT INTO calendar
592
+ (entry_key, date, event_type, ticker, data, created_at, updated_at,
593
+ expiry_date, metadata)
594
+ VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s)
595
+ ON DUPLICATE KEY UPDATE
596
+ data = VALUES(data),
597
+ updated_at = VALUES(updated_at),
598
+ expiry_date = VALUES(expiry_date),
599
+ metadata = VALUES(metadata)
600
+ ''', (
601
+ entry_key,
602
+ entry.date,
603
+ entry.data_type, # event_type (earnings, ipo, etc.)
604
+ entry.ticker,
605
+ json.dumps(self._clean_data_for_json(entry.data), default=str),
606
+ entry.created_at,
607
+ entry.updated_at,
608
+ entry.expiry_date,
609
+ json.dumps(entry.metadata)
610
+ ))
611
+ else:
612
+ # News and fundamental_analysis tables don't have event_type
613
+ cursor.execute(f'''
614
+ INSERT INTO {table_name}
615
+ (entry_key, date, ticker, data, created_at, updated_at,
616
+ expiry_date, metadata)
617
+ VALUES (%s, %s, %s, %s, %s, %s, %s, %s)
618
+ ON DUPLICATE KEY UPDATE
619
+ data = VALUES(data),
620
+ updated_at = VALUES(updated_at),
621
+ expiry_date = VALUES(expiry_date),
622
+ metadata = VALUES(metadata)
623
+ ''', (
624
+ entry_key,
625
+ entry.date,
626
+ entry.ticker,
627
+ json.dumps(self._clean_data_for_json(entry.data), default=str),
628
+ entry.created_at,
629
+ entry.updated_at,
630
+ entry.expiry_date,
631
+ json.dumps(entry.metadata)
632
+ ))
633
+
634
+ conn.commit()
635
+ return True
636
+
637
+ except Exception as e:
638
+ print(f"Error saving entry {entry.ticker}: {e}")
639
+ return False
640
+ finally:
641
+ if 'cursor' in locals():
642
+ cursor.close()
643
+ if 'conn' in locals() and conn:
644
+ conn.close()
645
+
646
+ def save_batch(self, entries: List[DatabaseEntry], expiry_days: Optional[int] = None) -> int:
647
+ """
648
+ Save multiple entries in batch. Updates existing entries if duplicates are found.
649
+
650
+ Args:
651
+ entries: List of DatabaseEntry objects
652
+ expiry_days: Optional expiry in days
653
+
654
+ Returns:
655
+ Number of successfully saved entries
656
+ """
657
+ success_count = 0
658
+
659
+ conn = self._create_connection()
660
+ if not conn:
661
+ return 0
662
+
663
+ cursor = conn.cursor()
664
+
665
+ try:
666
+ for entry in entries:
667
+ try:
668
+ # CRITICAL: Check if ticker is in the available_tickers whitelist
669
+ # Skip check for economic events as they use country names as tickers
670
+ if entry.data_type != DataType.ECONOMIC_EVENTS.value and not self.is_ticker_available(entry.ticker):
671
+ print(f"⚠️ Skipping {entry.data_type} for {entry.ticker} - ticker not in available_tickers whitelist")
672
+ continue
673
+
674
+ entry_key = entry.generate_key()
675
+ table_name = self._get_table_name(entry.data_type)
676
+
677
+ # Check if entry already exists
678
+ cursor.execute(f'SELECT created_at FROM {table_name} WHERE entry_key = %s', (entry_key,))
679
+ existing = cursor.fetchone()
680
+
681
+ # Preserve original created_at if updating
682
+ if existing:
683
+ entry.created_at = str(existing[0])
684
+
685
+ # Update the updated_at timestamp
686
+ entry.updated_at = datetime.now().isoformat()
687
+
688
+ if expiry_days:
689
+ expiry_date = (datetime.now() + timedelta(days=expiry_days)).date().isoformat()
690
+ entry.expiry_date = expiry_date
691
+
692
+ # Store data directly in database - different format for calendar
693
+ if table_name == 'calendar':
694
+ cursor.execute('''
695
+ INSERT INTO calendar
696
+ (entry_key, date, event_type, ticker, data, created_at, updated_at,
697
+ expiry_date, metadata)
698
+ VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s)
699
+ ON DUPLICATE KEY UPDATE
700
+ data = VALUES(data),
701
+ updated_at = VALUES(updated_at),
702
+ expiry_date = VALUES(expiry_date),
703
+ metadata = VALUES(metadata)
704
+ ''', (
705
+ entry_key,
706
+ entry.date,
707
+ entry.data_type, # event_type
708
+ entry.ticker,
709
+ json.dumps(self._clean_data_for_json(entry.data), default=str),
710
+ entry.created_at,
711
+ entry.updated_at,
712
+ entry.expiry_date,
713
+ json.dumps(entry.metadata)
714
+ ))
715
+ else:
716
+ cursor.execute(f'''
717
+ INSERT INTO {table_name}
718
+ (entry_key, date, ticker, data, created_at, updated_at,
719
+ expiry_date, metadata)
720
+ VALUES (%s, %s, %s, %s, %s, %s, %s, %s)
721
+ ON DUPLICATE KEY UPDATE
722
+ data = VALUES(data),
723
+ updated_at = VALUES(updated_at),
724
+ expiry_date = VALUES(expiry_date),
725
+ metadata = VALUES(metadata)
726
+ ''', (
727
+ entry_key,
728
+ entry.date,
729
+ entry.ticker,
730
+ json.dumps(self._clean_data_for_json(entry.data), default=str),
731
+ entry.created_at,
732
+ entry.updated_at,
733
+ entry.expiry_date,
734
+ json.dumps(entry.metadata)
735
+ ))
736
+
737
+ success_count += 1
738
+
739
+ except Exception as e:
740
+ print(f"Error saving entry {entry.ticker}: {e}")
741
+ continue
742
+
743
+ conn.commit()
744
+
745
+ finally:
746
+ cursor.close()
747
+ conn.close()
748
+
749
+ return success_count
750
+
751
+ def save_signal(self, ticker: str, calendar_event_keys: List[str], news_keys: List[str],
752
+ fundamental_key: str, signal_position: str, sentiment: Dict = None) -> bool:
753
+ """Save a generated signal to the database"""
754
+ try:
755
+ conn = self._create_connection()
756
+ if not conn:
757
+ return False
758
+ cursor = conn.cursor()
759
+
760
+ signal_date = datetime.now().date().isoformat()
761
+ signal_id = hashlib.md5(f"{ticker}_{signal_date}".encode()).hexdigest()
762
+ now = datetime.now()
763
+
764
+ # Merge with existing sentiment if provided
765
+ final_sentiment = sentiment
766
+ if sentiment:
767
+ cursor.execute("SELECT sentiment FROM signals WHERE signal_id = %s", (signal_id,))
768
+ existing = cursor.fetchone()
769
+ if existing and existing[0]:
770
+ existing_sentiment = json.loads(existing[0]) if isinstance(existing[0], str) else existing[0]
771
+ if isinstance(existing_sentiment, dict):
772
+ existing_sentiment.update(sentiment)
773
+ final_sentiment = existing_sentiment
774
+
775
+ cursor.execute('''
776
+ INSERT INTO signals
777
+ (signal_id, ticker, signal_date, signal_position, calendar_event_keys, news_keys,
778
+ fundamental_analysis_key, sentiment, created_at, updated_at, metadata)
779
+ VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
780
+ ON DUPLICATE KEY UPDATE
781
+ signal_position = VALUES(signal_position),
782
+ calendar_event_keys = VALUES(calendar_event_keys),
783
+ news_keys = VALUES(news_keys),
784
+ fundamental_analysis_key = VALUES(fundamental_analysis_key),
785
+ sentiment = VALUES(sentiment),
786
+ updated_at = VALUES(updated_at)
787
+ ''', (
788
+ signal_id,
789
+ ticker,
790
+ signal_date,
791
+ signal_position,
792
+ json.dumps(calendar_event_keys),
793
+ json.dumps(news_keys),
794
+ fundamental_key,
795
+ json.dumps(final_sentiment) if final_sentiment else None,
796
+ now,
797
+ now,
798
+ json.dumps({})
799
+ ))
800
+
801
+ conn.commit()
802
+ return True
803
+ except Error as e:
804
+ print(f"❌ Error saving signal: {e}")
805
+ return False
806
+ finally:
807
+ if 'cursor' in locals():
808
+ cursor.close()
809
+ if 'conn' in locals():
810
+ conn.close()
811
+
812
+ def get_signal(self, ticker: str, date_str: str = None) -> Optional[Dict]:
813
+ """Get signal for a ticker on a specific date (defaults to today)"""
814
+ try:
815
+ if not date_str:
816
+ date_str = datetime.now().date().isoformat()
817
+
818
+ conn = self._create_connection()
819
+ if not conn:
820
+ return None
821
+ cursor = conn.cursor(dictionary=True)
822
+
823
+ cursor.execute('''
824
+ SELECT * FROM signals
825
+ WHERE ticker = %s AND signal_date = %s
826
+ ''', (ticker, date_str))
827
+
828
+ result = cursor.fetchone()
829
+
830
+ if result:
831
+ # Parse JSON fields
832
+ for field in ['calendar_event_keys', 'news_keys', 'metadata', 'sentiment']:
833
+ if result.get(field):
834
+ if isinstance(result[field], str):
835
+ result[field] = json.loads(result[field])
836
+
837
+ return result
838
+ except Error as e:
839
+ print(f"❌ Error getting signal: {e}")
840
+ return None
841
+ finally:
842
+ if 'cursor' in locals():
843
+ cursor.close()
844
+ if 'conn' in locals():
845
+ conn.close()
846
+
847
+ def get(self, date_str: str, data_type: str, ticker: str) -> Optional[DatabaseEntry]:
848
+ """
849
+ Retrieve entry by date, type, and ticker
850
+
851
+ Args:
852
+ date_str: Date in YYYY-MM-DD format
853
+ data_type: Data type (earnings, ipo, news, fundamental_analysis, etc.)
854
+ ticker: Stock ticker
855
+
856
+ Returns:
857
+ DatabaseEntry if found, None otherwise
858
+ """
859
+ try:
860
+ # Generate key
861
+ key_string = f"{date_str}_{data_type}_{ticker}"
862
+ entry_key = hashlib.md5(key_string.encode()).hexdigest()
863
+
864
+ # Get table name for this data type
865
+ table_name = self._get_table_name(data_type)
866
+
867
+ conn = self._create_connection()
868
+ if not conn:
869
+ return None
870
+ cursor = conn.cursor()
871
+
872
+ # Different SELECT based on table structure
873
+ if table_name == 'calendar':
874
+ cursor.execute('''
875
+ SELECT date, event_type, ticker, data, created_at, updated_at, expiry_date, metadata
876
+ FROM calendar
877
+ WHERE entry_key = %s
878
+ ''', (entry_key,))
879
+ else:
880
+ cursor.execute(f'''
881
+ SELECT date, ticker, data, created_at, updated_at, expiry_date, metadata
882
+ FROM {table_name}
883
+ WHERE entry_key = %s
884
+ ''', (entry_key,))
885
+
886
+ result = cursor.fetchone()
887
+ cursor.close()
888
+ conn.close()
889
+
890
+ if not result:
891
+ return None
892
+
893
+ # Parse result based on table structure
894
+ if table_name == 'calendar':
895
+ date_val, event_type_val, ticker_val, data_json, created_at, updated_at, expiry_date, metadata_json = result
896
+ data_type_val = event_type_val # event_type is the data_type
897
+ else:
898
+ date_val, ticker_val, data_json, created_at, updated_at, expiry_date, metadata_json = result
899
+ data_type_val = data_type # Use the data_type parameter
900
+
901
+ # Check if expired
902
+ if expiry_date:
903
+ if str(expiry_date) < datetime.now().date().isoformat():
904
+ return None
905
+
906
+ # Parse JSON data from database
907
+ data_dict = json.loads(data_json) if isinstance(data_json, str) else data_json
908
+ metadata_dict = json.loads(metadata_json) if isinstance(metadata_json, str) else (metadata_json or {})
909
+
910
+ # Create DatabaseEntry
911
+ entry = DatabaseEntry(
912
+ date=str(date_val),
913
+ data_type=str(data_type_val),
914
+ ticker=str(ticker_val),
915
+ data=data_dict,
916
+ created_at=str(created_at),
917
+ updated_at=str(updated_at),
918
+ expiry_date=str(expiry_date) if expiry_date else None,
919
+ metadata=metadata_dict
920
+ )
921
+ return entry
922
+
923
+ except Exception as e:
924
+ print(f"Error retrieving entry: {e}")
925
+ return None
926
+
927
+ def query(self,
928
+ date_from: Optional[str] = None,
929
+ date_to: Optional[str] = None,
930
+ data_type: Optional[str] = None,
931
+ ticker: Optional[str] = None,
932
+ limit: Optional[int] = None,
933
+ include_expired: bool = False) -> List[DatabaseEntry]:
934
+ """
935
+ Query database with flexible filters across all tables
936
+
937
+ Args:
938
+ date_from: Start date (inclusive)
939
+ date_to: End date (inclusive)
940
+ data_type: Filter by data type (e.g., 'earnings', 'news', 'fundamental_analysis')
941
+ ticker: Filter by ticker
942
+ limit: Max results
943
+ include_expired: Whether to include expired entries
944
+
945
+ Returns:
946
+ List of DatabaseEntry objects
947
+ """
948
+ try:
949
+ conn = self._create_connection()
950
+ if not conn:
951
+ return []
952
+
953
+ cursor = conn.cursor()
954
+ entries = []
955
+
956
+ # Determine which tables to query
957
+ tables_to_query = []
958
+ if data_type:
959
+ # Query specific table based on data_type
960
+ table_name = self._get_table_name(data_type)
961
+ tables_to_query.append((table_name, data_type))
962
+ else:
963
+ # Query all tables
964
+ tables_to_query = [
965
+ ('calendar', None), # Will get all calendar events
966
+ ('news', 'news'),
967
+ ('fundamental_analysis', 'fundamental_analysis')
968
+ ]
969
+
970
+ # Query each table
971
+ for table_name, specific_type in tables_to_query:
972
+ params = []
973
+
974
+ if table_name == 'calendar':
975
+ # Calendar has event_type column
976
+ query = "SELECT date, event_type, ticker, data, created_at, updated_at, expiry_date, metadata FROM calendar WHERE 1=1"
977
+
978
+ if specific_type: # Specific calendar event type
979
+ query += " AND event_type = %s"
980
+ params.append(specific_type)
981
+ else:
982
+ # News and fundamental_analysis don't have event_type
983
+ query = f"SELECT date, ticker, data, created_at, updated_at, expiry_date, metadata FROM {table_name} WHERE 1=1"
984
+
985
+ if date_from:
986
+ query += " AND date >= %s"
987
+ params.append(date_from)
988
+
989
+ if date_to:
990
+ query += " AND date <= %s"
991
+ params.append(date_to)
992
+
993
+ if ticker:
994
+ query += " AND ticker = %s"
995
+ params.append(ticker)
996
+
997
+ if not include_expired:
998
+ query += " AND (expiry_date IS NULL OR expiry_date >= %s)"
999
+ params.append(datetime.now().date().isoformat())
1000
+
1001
+ query += " ORDER BY date DESC, created_at DESC"
1002
+
1003
+ if limit and len(tables_to_query) == 1:
1004
+ # Only apply limit if querying a single table
1005
+ query += f" LIMIT {limit}"
1006
+
1007
+ cursor.execute(query, tuple(params))
1008
+ results = cursor.fetchall()
1009
+
1010
+ # Parse results based on table structure
1011
+ for row in results:
1012
+ try:
1013
+ if table_name == 'calendar':
1014
+ date_val, event_type_val, ticker_val, data_json, created_at, updated_at, expiry_date, metadata_json = row
1015
+ data_type_val = event_type_val
1016
+ else:
1017
+ date_val, ticker_val, data_json, created_at, updated_at, expiry_date, metadata_json = row
1018
+ data_type_val = specific_type
1019
+
1020
+ # Parse JSON data
1021
+ data_dict = json.loads(data_json) if isinstance(data_json, str) else data_json
1022
+ metadata_dict = json.loads(metadata_json) if isinstance(metadata_json, str) else (metadata_json or {})
1023
+
1024
+ entry = DatabaseEntry(
1025
+ date=str(date_val),
1026
+ data_type=str(data_type_val),
1027
+ ticker=str(ticker_val),
1028
+ data=data_dict,
1029
+ created_at=str(created_at),
1030
+ updated_at=str(updated_at),
1031
+ expiry_date=str(expiry_date) if expiry_date else None,
1032
+ metadata=metadata_dict
1033
+ )
1034
+ entries.append(entry)
1035
+ except Exception as e:
1036
+ print(f"Error loading entry from {table_name}: {e}")
1037
+ continue
1038
+
1039
+ cursor.close()
1040
+ conn.close()
1041
+
1042
+ # Sort all entries by date and apply limit if needed
1043
+ entries.sort(key=lambda x: (x.date, x.created_at), reverse=True)
1044
+ if limit:
1045
+ entries = entries[:limit]
1046
+
1047
+ return entries
1048
+
1049
+ except Exception as e:
1050
+ print(f"Error querying database: {e}")
1051
+ return []
1052
+
1053
+ def delete(self, date_str: str, data_type: str, ticker: str) -> bool:
1054
+ """Delete entry by date, type, and ticker"""
1055
+ try:
1056
+ # Generate key
1057
+ key_string = f"{date_str}_{data_type}_{ticker}"
1058
+ entry_key = hashlib.md5(key_string.encode()).hexdigest()
1059
+
1060
+ table_name = self._get_table_name(data_type)
1061
+
1062
+ conn = self._create_connection()
1063
+ if not conn:
1064
+ return False
1065
+ cursor = conn.cursor()
1066
+
1067
+ cursor.execute(f'DELETE FROM {table_name} WHERE entry_key = %s', (entry_key,))
1068
+ conn.commit()
1069
+
1070
+ conn.close()
1071
+ return True
1072
+
1073
+ except Exception as e:
1074
+ print(f"Error deleting entry: {e}")
1075
+ return False
1076
+
1077
+ def clean_expired(self) -> int:
1078
+ """Remove expired entries"""
1079
+ try:
1080
+ conn = self._create_connection()
1081
+ if not conn:
1082
+ return 0
1083
+ cursor = conn.cursor()
1084
+
1085
+ total_cleaned = 0
1086
+
1087
+ for table_name in ['calendar', 'news', 'fundamental_analysis']:
1088
+ cursor.execute(f'''
1089
+ DELETE FROM {table_name}
1090
+ WHERE expiry_date IS NOT NULL AND expiry_date < %s
1091
+ ''', (datetime.now().date().isoformat(),))
1092
+ total_cleaned += cursor.rowcount
1093
+
1094
+ conn.commit()
1095
+ conn.close()
1096
+
1097
+ print(f"✓ Cleaned {total_cleaned} expired entries")
1098
+ return total_cleaned
1099
+
1100
+ except Exception as e:
1101
+ print(f"Error cleaning expired entries: {e}")
1102
+ return 0
1103
+
1104
+ def get_stats(self) -> Dict[str, Any]:
1105
+ """Get database statistics across all tables"""
1106
+ try:
1107
+ conn = self._create_connection()
1108
+ if not conn:
1109
+ return {}
1110
+ cursor = conn.cursor()
1111
+
1112
+ # Initialize counters
1113
+ total_entries = 0
1114
+ by_type = {}
1115
+ all_tickers = {}
1116
+ total_size = 0
1117
+ expired_count = 0
1118
+ min_date = None
1119
+ max_date = None
1120
+
1121
+ # Query each table
1122
+ for table_name in ['calendar', 'news', 'fundamental_analysis']:
1123
+ # Count entries
1124
+ cursor.execute(f'SELECT COUNT(*) FROM {table_name}')
1125
+ table_count = cursor.fetchone()[0]
1126
+ total_entries += table_count
1127
+
1128
+ if table_name == 'calendar':
1129
+ # Get counts by event_type
1130
+ cursor.execute('SELECT event_type, COUNT(*) FROM calendar GROUP BY event_type')
1131
+ for event_type, count in cursor.fetchall():
1132
+ by_type[event_type] = count
1133
+ else:
1134
+ # For news and fundamental_analysis, use table name as type
1135
+ by_type[table_name] = table_count
1136
+
1137
+ # Get ticker counts
1138
+ cursor.execute(f'SELECT ticker, COUNT(*) FROM {table_name} GROUP BY ticker')
1139
+ for ticker, count in cursor.fetchall():
1140
+ all_tickers[ticker] = all_tickers.get(ticker, 0) + count
1141
+
1142
+ # Get data size
1143
+ cursor.execute(f'SELECT SUM(LENGTH(data)) FROM {table_name}')
1144
+ table_size = cursor.fetchone()[0] or 0
1145
+ total_size += table_size
1146
+
1147
+ # Count expired entries
1148
+ cursor.execute(f'''
1149
+ SELECT COUNT(*) FROM {table_name}
1150
+ WHERE expiry_date IS NOT NULL AND expiry_date < %s
1151
+ ''', (datetime.now().date().isoformat(),))
1152
+ expired_count += cursor.fetchone()[0]
1153
+
1154
+ # Get date range
1155
+ cursor.execute(f'SELECT MIN(date), MAX(date) FROM {table_name}')
1156
+ table_date_range = cursor.fetchone()
1157
+ if table_date_range[0]:
1158
+ if min_date is None or table_date_range[0] < min_date:
1159
+ min_date = table_date_range[0]
1160
+ if max_date is None or table_date_range[1] > max_date:
1161
+ max_date = table_date_range[1]
1162
+
1163
+ # Get top 10 tickers
1164
+ top_tickers = dict(sorted(all_tickers.items(), key=lambda x: x[1], reverse=True)[:10])
1165
+
1166
+ conn.close()
1167
+
1168
+ stats = {
1169
+ 'total_entries': total_entries,
1170
+ 'by_type': by_type,
1171
+ 'top_tickers': top_tickers,
1172
+ 'total_size_bytes': total_size,
1173
+ 'total_size_mb': round(total_size / (1024 * 1024), 2),
1174
+ 'expired_entries': expired_count,
1175
+ 'date_range': {'from': str(min_date), 'to': str(max_date)} if min_date else None,
1176
+ 'compression': 'enabled' if self.compress else 'disabled'
1177
+ }
1178
+
1179
+ return stats
1180
+
1181
+ except Exception as e:
1182
+ print(f"Error getting stats: {e}")
1183
+ return {}
1184
+
1185
+ def clear_all(self) -> bool:
1186
+ """Clear all data (use with caution!)"""
1187
+ try:
1188
+ conn = self._create_connection()
1189
+ if not conn:
1190
+ return False
1191
+ cursor = conn.cursor()
1192
+
1193
+ # Truncate all tables
1194
+ for table_name in ['calendar', 'news', 'fundamental_analysis', 'signals']:
1195
+ cursor.execute(f'TRUNCATE TABLE {table_name}')
1196
+
1197
+ conn.commit()
1198
+ conn.close()
1199
+
1200
+ print("✓ All data cleared")
1201
+ return True
1202
+
1203
+ except Exception as e:
1204
+ print(f"Error clearing data: {e}")
1205
+ return False
1206
+
1207
+
1208
+ # Global database instance
1209
+ db_instance = None
1210
+
1211
+
1212
+ def get_database(db_dir: str = "database", compress: bool = False) -> LocalDatabase:
1213
+ """Get or create database instance"""
1214
+ global db_instance
1215
+ if db_instance is None or db_instance.db_dir != Path(db_dir):
1216
+ db_instance = LocalDatabase(db_dir=db_dir, compress=compress)
1217
+ return db_instance
src/db/migrate_local_to_tidb.py ADDED
@@ -0,0 +1,132 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import mysql.connector
2
+ from mysql.connector import Error
3
+ import os
4
+ from dotenv import load_dotenv
5
+ from pathlib import Path
6
+ import sys
7
+
8
+ # Add src to path to import if needed, though we'll use raw connections
9
+ sys.path.append(str(Path(__file__).parent.parent.parent))
10
+
11
+ def create_source_connection():
12
+ """Connect to local MySQL database"""
13
+ try:
14
+ config = {
15
+ 'host': 'localhost',
16
+ 'user': 'root',
17
+ 'password': '1234',
18
+ 'database': 'gotti'
19
+ }
20
+ conn = mysql.connector.connect(**config)
21
+ print("✅ Connected to source (local) database")
22
+ return conn
23
+ except Error as e:
24
+ print(f"❌ Error connecting to source database: {e}")
25
+ return None
26
+
27
+ def create_dest_connection():
28
+ """Connect to destination TiDB database"""
29
+ try:
30
+ load_dotenv(override=True)
31
+
32
+ config = {
33
+ 'host': os.getenv('DB_HOST'),
34
+ 'user': os.getenv('DB_USERNAME'),
35
+ 'password': os.getenv('DB_PASSWORD'),
36
+ 'database': os.getenv('DB_DATABASE'),
37
+ 'port': int(os.getenv('DB_PORT', 4000))
38
+ }
39
+
40
+ ssl_ca = os.getenv('DB_SSL_CA')
41
+ if ssl_ca:
42
+ project_root = Path(__file__).parent.parent.parent
43
+ ssl_ca_path = project_root / ssl_ca
44
+ if ssl_ca_path.exists():
45
+ config['ssl_ca'] = str(ssl_ca_path)
46
+ config['ssl_verify_cert'] = True
47
+ config['ssl_verify_identity'] = True
48
+
49
+ conn = mysql.connector.connect(**config)
50
+ print("✅ Connected to destination (TiDB) database")
51
+ return conn
52
+ except Error as e:
53
+ print(f"❌ Error connecting to destination database: {e}")
54
+ return None
55
+
56
+ def migrate_table(source_conn, dest_conn, table_name):
57
+ """Migrate a single table from source to destination"""
58
+ print(f"\nMigrating table: {table_name}...")
59
+
60
+ try:
61
+ source_cursor = source_conn.cursor(dictionary=True)
62
+ dest_cursor = dest_conn.cursor()
63
+
64
+ # Check if table exists in source
65
+ try:
66
+ source_cursor.execute(f"SELECT * FROM {table_name}")
67
+ rows = source_cursor.fetchall()
68
+ except Error as e:
69
+ print(f"⚠️ Skipping {table_name}: {e}")
70
+ return
71
+
72
+ if not rows:
73
+ print(f"ℹ️ Table {table_name} is empty.")
74
+ return
75
+
76
+ print(f"Found {len(rows)} rows in {table_name}.")
77
+
78
+ # Get column names
79
+ columns = list(rows[0].keys())
80
+ placeholders = ', '.join(['%s'] * len(columns))
81
+ columns_str = ', '.join(columns)
82
+
83
+ insert_query = f"""
84
+ INSERT INTO {table_name} ({columns_str})
85
+ VALUES ({placeholders})
86
+ ON DUPLICATE KEY UPDATE
87
+ {', '.join([f"{col}=VALUES({col})" for col in columns])}
88
+ """
89
+
90
+ # Batch insert
91
+ batch_size = 100
92
+ for i in range(0, len(rows), batch_size):
93
+ batch = rows[i:i+batch_size]
94
+ values = [tuple(row[col] for col in columns) for row in batch]
95
+ try:
96
+ dest_cursor.executemany(insert_query, values)
97
+ dest_conn.commit()
98
+ print(f" Migrated {min(i+batch_size, len(rows))}/{len(rows)} rows...")
99
+ except Error as e:
100
+ print(f"❌ Error inserting batch: {e}")
101
+
102
+ print(f"✅ Successfully migrated {table_name}")
103
+
104
+ except Error as e:
105
+ print(f"❌ Error migrating {table_name}: {e}")
106
+ finally:
107
+ if 'source_cursor' in locals():
108
+ source_cursor.close()
109
+ if 'dest_cursor' in locals():
110
+ dest_cursor.close()
111
+
112
+ def main():
113
+ print("🚀 Starting migration from Local MySQL to TiDB Cloud...")
114
+
115
+ source_conn = create_source_connection()
116
+ dest_conn = create_dest_connection()
117
+
118
+ if not source_conn or not dest_conn:
119
+ print("❌ Could not establish both connections. Aborting.")
120
+ return
121
+
122
+ tables = ['available_tickers', 'calendar', 'news', 'fundamental_analysis', 'signals']
123
+
124
+ for table in tables:
125
+ migrate_table(source_conn, dest_conn, table)
126
+
127
+ source_conn.close()
128
+ dest_conn.close()
129
+ print("\n✨ Migration completed!")
130
+
131
+ if __name__ == "__main__":
132
+ main()
src/db/verify_migration.py ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import sys
2
+ from pathlib import Path
3
+ sys.path.append(str(Path(__file__).parent.parent.parent))
4
+ from src.db.local_database import LocalDatabase
5
+
6
+ def verify():
7
+ print("🔍 Verifying data access via LocalDatabase (TiDB)...")
8
+ db = LocalDatabase()
9
+
10
+ stats = db.get_stats()
11
+ print("\nDatabase Statistics:")
12
+ print(f"Total Entries: {stats.get('total_entries')}")
13
+ print(f"By Type: {stats.get('by_type')}")
14
+ print(f"Top Tickers: {stats.get('top_tickers')}")
15
+
16
+ if stats.get('total_entries', 0) > 0:
17
+ print("\n✅ Verification Successful: Data is accessible!")
18
+ else:
19
+ print("\n❌ Verification Failed: No data found.")
20
+
21
+ if __name__ == "__main__":
22
+ verify()
src/fundamental_analysis/calculator.py ADDED
@@ -0,0 +1,610 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Stock Metrics Calculator
3
+ Combines all individual metric functions and fetches data from yfinance
4
+ Returns comprehensive DataFrame with all calculated metrics
5
+ """
6
+
7
+ import yfinance as yf
8
+ import pandas as pd
9
+ import numpy as np
10
+ from typing import Dict, List, Optional, Tuple
11
+ from datetime import datetime
12
+ import warnings
13
+ warnings.filterwarnings('ignore')
14
+
15
+ # Import all metric calculation functions
16
+ from fundamental_analysis.metrics import *
17
+
18
+
19
+ class StockMetricsCalculator:
20
+ """Calculate comprehensive metrics for a single stock"""
21
+
22
+ def __init__(self, ticker: str):
23
+ """
24
+ Initialize calculator for a ticker
25
+
26
+ Args:
27
+ ticker: Stock ticker symbol (e.g., 'AAPL')
28
+ """
29
+ self.ticker = ticker.upper()
30
+ self.stock = yf.Ticker(self.ticker)
31
+ self.data_fetched = False
32
+ self.missing_metrics = []
33
+
34
+ # Raw data containers
35
+ self.info = {}
36
+ self.financials = {}
37
+ self.balance_sheet = {}
38
+ self.cashflow = {}
39
+ self.quarterly_financials = {}
40
+ self.quarterly_balance = {}
41
+ self.quarterly_cashflow = {}
42
+
43
+ def fetch_data(self) -> bool:
44
+ """
45
+ Fetch all available data from yfinance
46
+
47
+ Returns:
48
+ True if successful, False otherwise
49
+ """
50
+ try:
51
+ # Get company info
52
+ self.info = self.stock.info
53
+
54
+ # Get financial statements
55
+ self.financials = self.stock.financials
56
+ self.balance_sheet = self.stock.balance_sheet
57
+ self.cashflow = self.stock.cashflow
58
+
59
+ # Get quarterly statements
60
+ self.quarterly_financials = self.stock.quarterly_financials
61
+ self.quarterly_balance = self.stock.quarterly_balance_sheet
62
+ self.quarterly_cashflow = self.stock.quarterly_cashflow
63
+
64
+ self.data_fetched = True
65
+ print(f"✓ Data fetched successfully for {self.ticker}")
66
+ return True
67
+
68
+ except Exception as e:
69
+ print(f"✗ Error fetching data for {self.ticker}: {str(e)}")
70
+ return False
71
+
72
+ def _get_from_statement(self, statement: pd.DataFrame, key: str, period: int = 0) -> Optional[float]:
73
+ """
74
+ Safely get value from financial statement
75
+
76
+ Args:
77
+ statement: DataFrame from yfinance
78
+ key: Row name to extract
79
+ period: Column index (0 = most recent)
80
+
81
+ Returns:
82
+ Value or None if not found
83
+ """
84
+ try:
85
+ if statement.empty:
86
+ return None
87
+ if key in statement.index:
88
+ values = statement.loc[key]
89
+ if not values.empty and period < len(values):
90
+ val = values.iloc[period]
91
+ return float(val) if pd.notna(val) else None
92
+ return None
93
+ except:
94
+ return None
95
+
96
+ def _calculate_ttm(self, quarterly_statement: pd.DataFrame, key: str) -> Optional[float]:
97
+ """Calculate TTM (Trailing Twelve Months) from quarterly data"""
98
+ try:
99
+ if quarterly_statement.empty or key not in quarterly_statement.index:
100
+ return None
101
+ values = quarterly_statement.loc[key].iloc[:4] # Last 4 quarters
102
+ values = values.replace({"-": None})
103
+ values = values.dropna()
104
+ if len(values) == 4 and values.notna().all():
105
+ return float(values.sum())
106
+ return None
107
+ except:
108
+ return None
109
+
110
+ def calculate_all_metrics(self) -> pd.DataFrame:
111
+ """
112
+ Calculate all available metrics
113
+
114
+ Returns:
115
+ DataFrame with metric names, values, formulas, and status
116
+ """
117
+ if not self.data_fetched:
118
+ self.fetch_data()
119
+
120
+ metrics_data = []
121
+
122
+ # ============================================================================
123
+ # EXTRACT RAW DATA
124
+ # ============================================================================
125
+
126
+ print("\nExtracting raw financial data...")
127
+
128
+ # Price and shares
129
+ price = self.info.get('currentPrice') or self.info.get('regularMarketPrice')
130
+ diluted_shares = self.info.get('sharesOutstanding')
131
+
132
+ # Income statement (use TTM when available)
133
+ revenue = self._calculate_ttm(self.quarterly_financials, 'Total Revenue') or \
134
+ self._get_from_statement(self.financials, 'Total Revenue')
135
+ revenue_prior = self._get_from_statement(self.financials, 'Total Revenue', 1)
136
+
137
+ cogs = self._calculate_ttm(self.quarterly_financials, 'Cost Of Revenue') or \
138
+ self._get_from_statement(self.financials, 'Cost Of Revenue')
139
+
140
+ gross_profit = self._calculate_ttm(self.quarterly_financials, 'Gross Profit') or \
141
+ self._get_from_statement(self.financials, 'Gross Profit')
142
+
143
+ ebit = self._calculate_ttm(self.quarterly_financials, 'EBIT') or \
144
+ self._get_from_statement(self.financials, 'EBIT')
145
+
146
+ ebitda = self.info.get('ebitda') or \
147
+ self._calculate_ttm(self.quarterly_financials, 'EBITDA') or \
148
+ self._get_from_statement(self.financials, 'EBITDA')
149
+
150
+ net_income = self._calculate_ttm(self.quarterly_financials, 'Net Income') or \
151
+ self._get_from_statement(self.financials, 'Net Income')
152
+ net_income_prior = self._get_from_statement(self.financials, 'Net Income', 1)
153
+
154
+ interest_expense = abs(self._calculate_ttm(self.quarterly_financials, 'Interest Expense') or \
155
+ self._get_from_statement(self.financials, 'Interest Expense') or 0)
156
+
157
+ # EPS
158
+ eps_ttm = self.info.get('trailingEps')
159
+ eps_forward = self.info.get('forwardEps')
160
+ eps_prior = self.info.get('trailingEps') # Would need historical data for accurate prior
161
+
162
+ # Balance sheet
163
+ total_assets = self._get_from_statement(self.balance_sheet, 'Total Assets')
164
+ current_assets = self._get_from_statement(self.balance_sheet, 'Current Assets')
165
+ current_liabilities = self._get_from_statement(self.balance_sheet, 'Current Liabilities')
166
+
167
+ total_debt = self.info.get('totalDebt') or \
168
+ (self._get_from_statement(self.balance_sheet, 'Long Term Debt') or 0) + \
169
+ (self._get_from_statement(self.balance_sheet, 'Short Term Debt') or 0)
170
+
171
+ cash = self._get_from_statement(self.balance_sheet, 'Cash And Cash Equivalents') or 0
172
+ cash_and_st_investments = self._get_from_statement(self.balance_sheet, 'Cash Cash Equivalents And Short Term Investments') or cash
173
+
174
+ total_equity = self._get_from_statement(self.balance_sheet, 'Total Equity Gross Minority Interest') or \
175
+ self._get_from_statement(self.balance_sheet, 'Stockholders Equity')
176
+ total_equity_prior = self._get_from_statement(self.balance_sheet, 'Stockholders Equity', 1)
177
+
178
+ receivables = self._get_from_statement(self.balance_sheet, 'Receivables') or 0
179
+ inventory = self._get_from_statement(self.balance_sheet, 'Inventory') or 0
180
+
181
+ book_value_per_share = self.info.get('bookValue')
182
+
183
+ # Cash flow
184
+ cfo = self._calculate_ttm(self.quarterly_cashflow, 'Operating Cash Flow') or \
185
+ self._get_from_statement(self.cashflow, 'Operating Cash Flow')
186
+
187
+ capex = abs(self._calculate_ttm(self.quarterly_cashflow, 'Capital Expenditure') or \
188
+ self._get_from_statement(self.cashflow, 'Capital Expenditure') or 0)
189
+
190
+ dividends_paid = abs(self._get_from_statement(self.cashflow, 'Cash Dividends Paid') or 0)
191
+ stock_repurchased = abs(self._get_from_statement(self.cashflow, 'Repurchase Of Capital Stock') or 0)
192
+
193
+ # Tax rate
194
+ tax_rate = self.info.get('effectiveTaxRate') or 0.21 # Default to 21% if not available
195
+
196
+ # Growth rates
197
+ earnings_growth = self.info.get('earningsGrowth') or 0
198
+ revenue_growth_rate = self.info.get('revenueGrowth') or 0
199
+
200
+ # ============================================================================
201
+ # CALCULATE DERIVED VALUES
202
+ # ============================================================================
203
+
204
+ # Market cap and EV
205
+ market_cap = calculate_market_cap(price, diluted_shares) if (price and diluted_shares) else self.info.get('marketCap')
206
+ enterprise_value = calculate_enterprise_value(market_cap, total_debt, cash) if market_cap else self.info.get('enterpriseValue')
207
+
208
+ # Free cash flow
209
+ free_cash_flow = calculate_free_cash_flow(cfo, capex) if cfo else None
210
+
211
+ # Averages for ratio calculations
212
+ avg_equity = calculate_average(total_equity, total_equity_prior) if (total_equity and total_equity_prior) else total_equity
213
+
214
+ # Invested capital
215
+ invested_capital = calculate_invested_capital(total_equity, total_debt, cash) if (total_equity and total_debt) else None
216
+
217
+ # ============================================================================
218
+ # CALCULATE ALL METRICS
219
+ # ============================================================================
220
+
221
+ print("Calculating metrics...")
222
+
223
+ # --- 1. VALUATION METRICS ---
224
+ metrics_data.append({
225
+ 'Category': 'Valuation',
226
+ 'Metric': 'Market Capitalization',
227
+ 'Value': market_cap,
228
+ 'Formula': 'Price × Diluted Shares',
229
+ 'Status': 'Available' if market_cap else 'Missing'
230
+ })
231
+
232
+ metrics_data.append({
233
+ 'Category': 'Valuation',
234
+ 'Metric': 'Enterprise Value (EV)',
235
+ 'Value': enterprise_value,
236
+ 'Formula': 'Market Cap + Total Debt - Cash',
237
+ 'Status': 'Available' if enterprise_value else 'Missing'
238
+ })
239
+
240
+ pe_ratio = calculate_pe_ratio(price, eps_ttm)
241
+ metrics_data.append({
242
+ 'Category': 'Valuation',
243
+ 'Metric': 'P/E Ratio (TTM)',
244
+ 'Value': pe_ratio,
245
+ 'Formula': 'Price / EPS',
246
+ 'Status': 'Available' if pe_ratio else 'Missing',
247
+ 'Threshold': '< sector median = undervalued'
248
+ })
249
+
250
+ pe_forward = calculate_pe_ratio(price, eps_forward)
251
+ metrics_data.append({
252
+ 'Category': 'Valuation',
253
+ 'Metric': 'P/E Ratio (Forward)',
254
+ 'Value': pe_forward,
255
+ 'Formula': 'Price / Forward EPS',
256
+ 'Status': 'Available' if pe_forward else 'Missing',
257
+ 'Threshold': 'Use for valuation comparisons'
258
+ })
259
+
260
+ peg_ratio = calculate_peg_ratio(pe_forward or pe_ratio, earnings_growth)
261
+ metrics_data.append({
262
+ 'Category': 'Valuation',
263
+ 'Metric': 'PEG Ratio',
264
+ 'Value': peg_ratio,
265
+ 'Formula': 'P/E / (EPS Growth % × 100)',
266
+ 'Status': 'Available' if peg_ratio else 'Missing',
267
+ 'Threshold': '< 0.8 = BUY, 0.8-1.2 = HOLD, > 1.5 = SELL'
268
+ })
269
+
270
+ ev_ebitda = calculate_ev_ebitda(enterprise_value, ebitda)
271
+ metrics_data.append({
272
+ 'Category': 'Valuation',
273
+ 'Metric': 'EV/EBITDA',
274
+ 'Value': ev_ebitda,
275
+ 'Formula': 'Enterprise Value / EBITDA',
276
+ 'Status': 'Available' if ev_ebitda else 'Missing',
277
+ 'Threshold': 'Compare to sector median'
278
+ })
279
+
280
+ price_to_fcf = calculate_price_to_fcf(market_cap, free_cash_flow)
281
+ metrics_data.append({
282
+ 'Category': 'Valuation',
283
+ 'Metric': 'Price / FCF',
284
+ 'Value': price_to_fcf,
285
+ 'Formula': 'Market Cap / Free Cash Flow',
286
+ 'Status': 'Available' if price_to_fcf else 'Missing'
287
+ })
288
+
289
+ fcf_yield_eq = calculate_fcf_yield_equity(free_cash_flow, market_cap)
290
+ metrics_data.append({
291
+ 'Category': 'Valuation',
292
+ 'Metric': 'FCF Yield (Equity) %',
293
+ 'Value': fcf_yield_eq,
294
+ 'Formula': '(FCF / Market Cap) × 100',
295
+ 'Status': 'Available' if fcf_yield_eq else 'Missing',
296
+ 'Threshold': '> 6% = BUY, 4-6% = HOLD, < 3% = SELL',
297
+ 'Priority': 'HIGHEST'
298
+ })
299
+
300
+ fcf_yield_ev = calculate_fcf_yield_enterprise(free_cash_flow, enterprise_value)
301
+ metrics_data.append({
302
+ 'Category': 'Valuation',
303
+ 'Metric': 'FCF Yield (Enterprise) %',
304
+ 'Value': fcf_yield_ev,
305
+ 'Formula': '(FCF / EV) × 100',
306
+ 'Status': 'Available' if fcf_yield_ev else 'Missing',
307
+ 'Threshold': '> 6% = BUY (preferred metric)',
308
+ 'Priority': 'HIGHEST'
309
+ })
310
+
311
+ pb_ratio = calculate_price_to_book(price, book_value_per_share)
312
+ metrics_data.append({
313
+ 'Category': 'Valuation',
314
+ 'Metric': 'Price / Book',
315
+ 'Value': pb_ratio,
316
+ 'Formula': 'Price / Book Value per Share',
317
+ 'Status': 'Available' if pb_ratio else 'Missing'
318
+ })
319
+
320
+ # --- 2. PROFITABILITY & MARGINS ---
321
+ gross_margin = calculate_gross_margin(revenue, cogs)
322
+ metrics_data.append({
323
+ 'Category': 'Profitability',
324
+ 'Metric': 'Gross Margin %',
325
+ 'Value': gross_margin,
326
+ 'Formula': '((Revenue - COGS) / Revenue) × 100',
327
+ 'Status': 'Available' if gross_margin else 'Missing',
328
+ 'Threshold': '> 40% good, > 60% excellent'
329
+ })
330
+
331
+ ebitda_margin = calculate_ebitda_margin(ebitda, revenue)
332
+ metrics_data.append({
333
+ 'Category': 'Profitability',
334
+ 'Metric': 'EBITDA Margin %',
335
+ 'Value': ebitda_margin,
336
+ 'Formula': '(EBITDA / Revenue) × 100',
337
+ 'Status': 'Available' if ebitda_margin else 'Missing',
338
+ 'Threshold': '> 20% excellent'
339
+ })
340
+
341
+ ebit_margin = calculate_ebit_margin(ebit, revenue)
342
+ metrics_data.append({
343
+ 'Category': 'Profitability',
344
+ 'Metric': 'EBIT Margin %',
345
+ 'Value': ebit_margin,
346
+ 'Formula': '(EBIT / Revenue) × 100',
347
+ 'Status': 'Available' if ebit_margin else 'Missing'
348
+ })
349
+
350
+ net_margin = calculate_net_margin(net_income, revenue)
351
+ metrics_data.append({
352
+ 'Category': 'Profitability',
353
+ 'Metric': 'Net Margin %',
354
+ 'Value': net_margin,
355
+ 'Formula': '(Net Income / Revenue) × 100',
356
+ 'Status': 'Available' if net_margin else 'Missing',
357
+ 'Threshold': '> 10% good'
358
+ })
359
+
360
+ # --- 3. CASH FLOW METRICS ---
361
+ metrics_data.append({
362
+ 'Category': 'Cash Flow',
363
+ 'Metric': 'Free Cash Flow',
364
+ 'Value': free_cash_flow,
365
+ 'Formula': 'CFO - CapEx',
366
+ 'Status': 'Available' if free_cash_flow else 'Missing',
367
+ 'Threshold': 'Must be positive',
368
+ 'Priority': 'CRITICAL'
369
+ })
370
+
371
+ fcf_per_share = calculate_fcf_per_share(free_cash_flow, diluted_shares)
372
+ metrics_data.append({
373
+ 'Category': 'Cash Flow',
374
+ 'Metric': 'FCF per Share',
375
+ 'Value': fcf_per_share,
376
+ 'Formula': 'FCF / Diluted Shares',
377
+ 'Status': 'Available' if fcf_per_share else 'Missing'
378
+ })
379
+
380
+ cash_conversion = calculate_cash_conversion(cfo, net_income)
381
+ metrics_data.append({
382
+ 'Category': 'Cash Flow',
383
+ 'Metric': 'Cash Conversion Ratio',
384
+ 'Value': cash_conversion,
385
+ 'Formula': 'CFO / Net Income',
386
+ 'Status': 'Available' if cash_conversion else 'Missing',
387
+ 'Threshold': '> 1.0 = quality earnings, < 1.0 RED FLAG',
388
+ 'Priority': 'HIGH'
389
+ })
390
+
391
+ # --- 4. LIQUIDITY & SOLVENCY ---
392
+ current_ratio = calculate_current_ratio(current_assets, current_liabilities)
393
+ metrics_data.append({
394
+ 'Category': 'Liquidity',
395
+ 'Metric': 'Current Ratio',
396
+ 'Value': current_ratio,
397
+ 'Formula': 'Current Assets / Current Liabilities',
398
+ 'Status': 'Available' if current_ratio else 'Missing',
399
+ 'Threshold': '> 1.5 good'
400
+ })
401
+
402
+ quick_ratio = calculate_quick_ratio(cash, 0, receivables, current_liabilities)
403
+ metrics_data.append({
404
+ 'Category': 'Liquidity',
405
+ 'Metric': 'Quick Ratio',
406
+ 'Value': quick_ratio,
407
+ 'Formula': '(Cash + Receivables) / Current Liabilities',
408
+ 'Status': 'Available' if quick_ratio else 'Missing'
409
+ })
410
+
411
+ net_debt_ebitda = calculate_net_debt_to_ebitda(total_debt, cash, ebitda)
412
+ metrics_data.append({
413
+ 'Category': 'Solvency',
414
+ 'Metric': 'Net Debt / EBITDA',
415
+ 'Value': net_debt_ebitda,
416
+ 'Formula': '(Total Debt - Cash) / EBITDA',
417
+ 'Status': 'Available' if net_debt_ebitda else 'Missing',
418
+ 'Threshold': '< 1 = Low risk, 1-3 = Moderate, > 3 = High risk',
419
+ 'Priority': 'HIGH'
420
+ })
421
+
422
+ interest_cov = calculate_interest_coverage(ebit, interest_expense)
423
+ metrics_data.append({
424
+ 'Category': 'Solvency',
425
+ 'Metric': 'Interest Coverage',
426
+ 'Value': interest_cov,
427
+ 'Formula': 'EBIT / Interest Expense',
428
+ 'Status': 'Available' if interest_cov else 'Missing',
429
+ 'Threshold': '> 3x safe, < 2x risky'
430
+ })
431
+
432
+ debt_to_equity = calculate_debt_to_equity(total_debt, total_equity)
433
+ metrics_data.append({
434
+ 'Category': 'Solvency',
435
+ 'Metric': 'Debt / Equity',
436
+ 'Value': debt_to_equity,
437
+ 'Formula': 'Total Debt / Total Equity',
438
+ 'Status': 'Available' if debt_to_equity else 'Missing'
439
+ })
440
+
441
+ # --- 5. RETURNS & EFFICIENCY ---
442
+ roe = calculate_roe(net_income, avg_equity)
443
+ metrics_data.append({
444
+ 'Category': 'Returns',
445
+ 'Metric': 'Return on Equity (ROE) %',
446
+ 'Value': roe,
447
+ 'Formula': '(Net Income / Avg Equity) × 100',
448
+ 'Status': 'Available' if roe else 'Missing',
449
+ 'Threshold': '> 15% good, > 20% excellent',
450
+ 'Priority': 'VERY HIGH'
451
+ })
452
+
453
+ roa = calculate_roa(net_income, total_assets)
454
+ metrics_data.append({
455
+ 'Category': 'Returns',
456
+ 'Metric': 'Return on Assets (ROA) %',
457
+ 'Value': roa,
458
+ 'Formula': '(Net Income / Total Assets) × 100',
459
+ 'Status': 'Available' if roa else 'Missing'
460
+ })
461
+
462
+ roic = calculate_roic(ebit, tax_rate, invested_capital)
463
+ metrics_data.append({
464
+ 'Category': 'Returns',
465
+ 'Metric': 'Return on Invested Capital (ROIC) %',
466
+ 'Value': roic,
467
+ 'Formula': '(EBIT × (1 - Tax Rate) / Invested Capital) × 100',
468
+ 'Status': 'Available' if roic else 'Missing',
469
+ 'Threshold': '> 10% good, > 15% excellent',
470
+ 'Priority': 'VERY HIGH - Best quality indicator'
471
+ })
472
+
473
+ # --- 6. GROWTH METRICS ---
474
+ rev_growth = calculate_revenue_growth(revenue, revenue_prior)
475
+ metrics_data.append({
476
+ 'Category': 'Growth',
477
+ 'Metric': 'Revenue Growth (YoY) %',
478
+ 'Value': rev_growth or (revenue_growth_rate * 100),
479
+ 'Formula': '((Current Rev - Prior Rev) / Prior Rev) × 100',
480
+ 'Status': 'Available' if (rev_growth or revenue_growth_rate) else 'Missing',
481
+ 'Threshold': '> 10% good, > 20% excellent'
482
+ })
483
+
484
+ eps_growth_calc = calculate_eps_growth(eps_ttm, eps_prior)
485
+ metrics_data.append({
486
+ 'Category': 'Growth',
487
+ 'Metric': 'EPS Growth (YoY) %',
488
+ 'Value': eps_growth_calc or (earnings_growth * 100),
489
+ 'Formula': '((Current EPS - Prior EPS) / Prior EPS) × 100',
490
+ 'Status': 'Available' if (eps_growth_calc or earnings_growth) else 'Missing',
491
+ 'Priority': 'HIGH'
492
+ })
493
+
494
+ # --- 7. CAPITAL ALLOCATION ---
495
+ payout_ratio = calculate_payout_ratio(dividends_paid, net_income)
496
+ metrics_data.append({
497
+ 'Category': 'Capital Allocation',
498
+ 'Metric': 'Payout Ratio %',
499
+ 'Value': payout_ratio,
500
+ 'Formula': '(Dividends / Net Income) × 100',
501
+ 'Status': 'Available' if payout_ratio else 'Missing',
502
+ 'Threshold': '< 60% sustainable'
503
+ })
504
+
505
+ buyback_yield = calculate_buyback_yield(stock_repurchased, market_cap)
506
+ metrics_data.append({
507
+ 'Category': 'Capital Allocation',
508
+ 'Metric': 'Buyback Yield %',
509
+ 'Value': buyback_yield,
510
+ 'Formula': '(Buyback Cash / Market Cap) × 100',
511
+ 'Status': 'Available' if buyback_yield else 'Missing'
512
+ })
513
+
514
+ total_payout = calculate_total_payout_ratio(dividends_paid, stock_repurchased, net_income)
515
+ metrics_data.append({
516
+ 'Category': 'Capital Allocation',
517
+ 'Metric': 'Total Payout Ratio %',
518
+ 'Value': total_payout,
519
+ 'Formula': '((Dividends + Buybacks) / Net Income) × 100',
520
+ 'Status': 'Available' if total_payout else 'Missing'
521
+ })
522
+
523
+ # Create DataFrame
524
+ df = pd.DataFrame(metrics_data)
525
+
526
+ # Track missing metrics
527
+ self.missing_metrics = df[df['Status'] == 'Missing']['Metric'].tolist()
528
+
529
+ print(f"\n✓ Calculated {len(df)} metrics")
530
+ print(f"✓ Available: {len(df[df['Status'] == 'Available'])}")
531
+ print(f"✗ Missing: {len(self.missing_metrics)}")
532
+
533
+ return df
534
+
535
+ def get_summary_statistics(self, df: pd.DataFrame) -> Dict:
536
+ """Generate summary statistics about the metrics"""
537
+ total = len(df)
538
+ available = len(df[df['Status'] == 'Available'])
539
+ missing = total - available
540
+
541
+ return {
542
+ 'ticker': self.ticker,
543
+ 'total_metrics': total,
544
+ 'available_metrics': available,
545
+ 'missing_metrics': missing,
546
+ 'coverage_percentage': (available / total) * 100 if total > 0 else 0,
547
+ 'missing_metric_list': self.missing_metrics
548
+ }
549
+
550
+
551
+ def calculate_metrics_for_ticker(ticker: str) -> Tuple[pd.DataFrame, Dict]:
552
+ """
553
+ Main function to calculate all metrics for a ticker
554
+
555
+ Args:
556
+ ticker: Stock ticker symbol
557
+
558
+ Returns:
559
+ Tuple of (metrics_dataframe, summary_statistics)
560
+ """
561
+ calculator = StockMetricsCalculator(ticker)
562
+
563
+ if not calculator.fetch_data():
564
+ return pd.DataFrame(), {}
565
+
566
+
567
+ metrics_df = calculator.calculate_all_metrics()
568
+ summary = calculator.get_summary_statistics(metrics_df)
569
+
570
+ return metrics_df, summary
571
+
572
+
573
+ if __name__ == "__main__":
574
+ # Test with a sample ticker
575
+ test_ticker = "AAPL"
576
+ print(f"Testing with {test_ticker}...")
577
+ print("=" * 80)
578
+
579
+ metrics_df, summary = calculate_metrics_for_ticker(test_ticker)
580
+
581
+ if not metrics_df.empty:
582
+ print("\n" + "=" * 80)
583
+ print("SUMMARY STATISTICS")
584
+ print("=" * 80)
585
+ for key, value in summary.items():
586
+ if key != 'missing_metric_list':
587
+ print(f"{key}: {value}")
588
+
589
+ print("\n" + "=" * 80)
590
+ print("SAMPLE METRICS (First 10)")
591
+ print("=" * 80)
592
+ print(metrics_df[['Category', 'Metric', 'Value', 'Status']].head(10).to_string(index=False))
593
+
594
+ print("\n" + "=" * 80)
595
+ print("HIGH PRIORITY METRICS")
596
+ print("=" * 80)
597
+ priority_metrics = metrics_df[metrics_df['Priority'].notna()][['Metric', 'Value', 'Threshold', 'Priority']]
598
+ print(priority_metrics.to_string(index=False))
599
+
600
+ if summary['missing_metrics'] > 0:
601
+ print("\n" + "=" * 80)
602
+ print("MISSING METRICS")
603
+ print("=" * 80)
604
+ for metric in summary['missing_metric_list']:
605
+ print(f" - {metric}")
606
+
607
+ # Save to CSV
608
+ output_file = f"{test_ticker}_metrics.csv"
609
+ metrics_df.to_csv(output_file, index=False)
610
+ print(f"\n✓ Metrics saved to {output_file}")
src/fundamental_analysis/data_fetcher.py ADDED
@@ -0,0 +1,336 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Enhanced financial data fetcher using yfinance.
3
+ Collects company data, sector data, and peer comparison data.
4
+ """
5
+
6
+ import yfinance as yf
7
+ import pandas as pd
8
+ import numpy as np
9
+ from typing import Dict, List, Optional, Tuple
10
+ from datetime import datetime
11
+ import warnings
12
+ warnings.filterwarnings('ignore')
13
+
14
+
15
+ class FinancialDataFetcher:
16
+ """Comprehensive data fetcher for fundamental analysis"""
17
+
18
+ def __init__(self, ticker: str):
19
+ """
20
+ Initialize fetcher for a ticker
21
+
22
+ Args:
23
+ ticker: Stock ticker symbol
24
+ """
25
+ self.ticker = ticker.upper()
26
+ self.stock = yf.Ticker(self.ticker)
27
+ self.info = None
28
+ self.sector = None
29
+ self.industry = None
30
+
31
+ def fetch_company_info(self) -> Dict:
32
+ """Fetch basic company information"""
33
+ try:
34
+ self.info = self.stock.info
35
+ self.sector = self.info.get('sector', 'Unknown')
36
+ self.industry = self.info.get('industry', 'Unknown')
37
+
38
+ return {
39
+ 'ticker': self.ticker,
40
+ 'company_name': self.info.get('longName', self.ticker),
41
+ 'sector': self.sector,
42
+ 'industry': self.industry,
43
+ 'market_cap': self.info.get('marketCap', 0),
44
+ 'country': self.info.get('country', 'Unknown'),
45
+ 'website': self.info.get('website', ''),
46
+ 'business_summary': self.info.get('longBusinessSummary', '')
47
+ }
48
+ except Exception as e:
49
+ print(f"Error fetching company info: {e}")
50
+ return {}
51
+
52
+ def fetch_financial_statements(self) -> Dict[str, pd.DataFrame]:
53
+ """Fetch all financial statements (annual)"""
54
+ try:
55
+ return {
56
+ 'income_statement': self.stock.income_stmt,
57
+ 'balance_sheet': self.stock.balance_sheet,
58
+ 'cash_flow': self.stock.cashflow,
59
+ 'quarterly_income': self.stock.quarterly_income_stmt,
60
+ 'quarterly_balance': self.stock.quarterly_balance_sheet,
61
+ 'quarterly_cashflow': self.stock.quarterly_cashflow
62
+ }
63
+ except Exception as e:
64
+ print(f"Error fetching financial statements: {e}")
65
+ return {}
66
+
67
+ def fetch_key_metrics(self) -> Dict:
68
+ """Fetch key financial metrics and ratios"""
69
+ try:
70
+ info = self.info if self.info else self.stock.info
71
+
72
+ return {
73
+ # Price metrics
74
+ 'current_price': info.get('currentPrice', info.get('regularMarketPrice', 0)),
75
+ 'previous_close': info.get('previousClose', 0),
76
+ '52_week_high': info.get('fiftyTwoWeekHigh', 0),
77
+ '52_week_low': info.get('fiftyTwoWeekLow', 0),
78
+
79
+ # Valuation metrics
80
+ 'market_cap': info.get('marketCap', 0),
81
+ 'enterprise_value': info.get('enterpriseValue', 0),
82
+ 'trailing_pe': info.get('trailingPE'),
83
+ 'forward_pe': info.get('forwardPE'),
84
+ 'peg_ratio': info.get('pegRatio'),
85
+ 'price_to_book': info.get('priceToBook'),
86
+ 'price_to_sales': info.get('priceToSalesTrailing12Months'),
87
+ 'ev_to_revenue': info.get('enterpriseToRevenue'),
88
+ 'ev_to_ebitda': info.get('enterpriseToEbitda'),
89
+
90
+ # Profitability metrics
91
+ 'profit_margin': info.get('profitMargins'),
92
+ 'operating_margin': info.get('operatingMargins'),
93
+ 'gross_margin': info.get('grossMargins'),
94
+ 'ebitda_margin': self._calculate_ebitda_margin(info),
95
+
96
+ # Returns
97
+ 'return_on_assets': info.get('returnOnAssets'),
98
+ 'return_on_equity': info.get('returnOnEquity'),
99
+
100
+ # Growth
101
+ 'revenue_growth': info.get('revenueGrowth'),
102
+ 'earnings_growth': info.get('earningsGrowth'),
103
+
104
+ # Financial health
105
+ 'total_cash': info.get('totalCash', 0),
106
+ 'total_debt': info.get('totalDebt', 0),
107
+ 'debt_to_equity': info.get('debtToEquity'),
108
+ 'current_ratio': info.get('currentRatio'),
109
+ 'quick_ratio': info.get('quickRatio'),
110
+
111
+ # Cash flow
112
+ 'operating_cash_flow': info.get('operatingCashflow', 0),
113
+ 'free_cash_flow': info.get('freeCashflow', 0),
114
+
115
+ # Per share
116
+ 'book_value_per_share': info.get('bookValue'),
117
+ 'revenue_per_share': info.get('revenuePerShare'),
118
+ 'eps_trailing': info.get('trailingEps'),
119
+ 'eps_forward': info.get('forwardEps'),
120
+
121
+ # Shares
122
+ 'shares_outstanding': info.get('sharesOutstanding', 0),
123
+
124
+ # Other
125
+ 'beta': info.get('beta'),
126
+ 'dividend_yield': info.get('dividendYield'),
127
+ }
128
+ except Exception as e:
129
+ print(f"Error fetching key metrics: {e}")
130
+ return {}
131
+
132
+ def _calculate_ebitda_margin(self, info: Dict) -> Optional[float]:
133
+ """Calculate EBITDA margin if available"""
134
+ try:
135
+ ebitda = info.get('ebitda')
136
+ revenue = info.get('totalRevenue')
137
+ if ebitda and revenue and revenue > 0:
138
+ return ebitda / revenue
139
+ except:
140
+ pass
141
+ return None
142
+
143
+ def fetch_peer_tickers(self, max_peers: int = 10) -> List[str]:
144
+ """
145
+ Fetch peer company tickers in the same sector/industry
146
+
147
+ Args:
148
+ max_peers: Maximum number of peers to return
149
+
150
+ Returns:
151
+ List of peer ticker symbols
152
+ """
153
+ try:
154
+ if not self.sector or self.sector == 'Unknown':
155
+ self.fetch_company_info()
156
+
157
+ # Try to get recommendations which sometimes include peers
158
+ recommendations = self.stock.recommendations
159
+ peers = set()
160
+
161
+ # Fallback: Use a simple sector-based approach
162
+ # In production, you'd use a proper database or API for peer identification
163
+ # For now, we'll return an empty list and let users provide peers manually
164
+
165
+ return []
166
+
167
+ except Exception as e:
168
+ print(f"Error fetching peer tickers: {e}")
169
+ return []
170
+
171
+ def fetch_peer_data(self, peer_tickers: List[str]) -> Dict[str, Dict]:
172
+ """
173
+ Fetch key metrics for peer companies
174
+
175
+ Args:
176
+ peer_tickers: List of peer ticker symbols
177
+
178
+ Returns:
179
+ Dictionary mapping ticker to metrics
180
+ """
181
+ peer_data = {}
182
+
183
+ for ticker in peer_tickers:
184
+ try:
185
+ print(f"Fetching data for peer: {ticker}")
186
+ peer_stock = yf.Ticker(ticker)
187
+ peer_info = peer_stock.info
188
+
189
+ peer_data[ticker] = {
190
+ 'company_name': peer_info.get('longName', ticker),
191
+ 'market_cap': peer_info.get('marketCap', 0),
192
+ 'trailing_pe': peer_info.get('trailingPE'),
193
+ 'forward_pe': peer_info.get('forwardPE'),
194
+ 'peg_ratio': peer_info.get('pegRatio'),
195
+ 'price_to_book': peer_info.get('priceToBook'),
196
+ 'profit_margin': peer_info.get('profitMargins'),
197
+ 'operating_margin': peer_info.get('operatingMargins'),
198
+ 'gross_margin': peer_info.get('grossMargins'),
199
+ 'return_on_equity': peer_info.get('returnOnEquity'),
200
+ 'return_on_assets': peer_info.get('returnOnAssets'),
201
+ 'revenue_growth': peer_info.get('revenueGrowth'),
202
+ 'earnings_growth': peer_info.get('earningsGrowth'),
203
+ 'debt_to_equity': peer_info.get('debtToEquity'),
204
+ 'current_ratio': peer_info.get('currentRatio'),
205
+ 'free_cash_flow': peer_info.get('freeCashflow', 0),
206
+ 'beta': peer_info.get('beta'),
207
+ }
208
+ except Exception as e:
209
+ print(f"Error fetching data for {ticker}: {e}")
210
+ continue
211
+
212
+ return peer_data
213
+
214
+ def calculate_sector_metrics(self, peer_data: Dict[str, Dict]) -> Dict:
215
+ """
216
+ Calculate sector-wide metrics from peer data
217
+
218
+ Args:
219
+ peer_data: Dictionary of peer metrics
220
+
221
+ Returns:
222
+ Dictionary of sector averages/medians
223
+ """
224
+ if not peer_data:
225
+ return {}
226
+
227
+ # Collect all metrics
228
+ metrics = {
229
+ 'trailing_pe': [],
230
+ 'forward_pe': [],
231
+ 'peg_ratio': [],
232
+ 'price_to_book': [],
233
+ 'profit_margin': [],
234
+ 'operating_margin': [],
235
+ 'gross_margin': [],
236
+ 'return_on_equity': [],
237
+ 'return_on_assets': [],
238
+ 'revenue_growth': [],
239
+ 'earnings_growth': [],
240
+ 'debt_to_equity': [],
241
+ 'current_ratio': [],
242
+ 'beta': []
243
+ }
244
+
245
+ # Aggregate peer metrics
246
+ for ticker, data in peer_data.items():
247
+ for key in metrics.keys():
248
+ value = data.get(key)
249
+ if value is not None and not (isinstance(value, float) and np.isnan(value)):
250
+ metrics[key].append(value)
251
+
252
+ # Calculate sector statistics
253
+ sector_metrics = {}
254
+ for key, values in metrics.items():
255
+ if values:
256
+ sector_metrics[f'{key}_median'] = float(np.median(values))
257
+ sector_metrics[f'{key}_mean'] = float(np.mean(values))
258
+ sector_metrics[f'{key}_min'] = float(np.min(values))
259
+ sector_metrics[f'{key}_max'] = float(np.max(values))
260
+ sector_metrics[f'{key}_count'] = len(values)
261
+
262
+ return sector_metrics
263
+
264
+ def fetch_all_data(self, peer_tickers: Optional[List[str]] = None) -> Dict:
265
+ """
266
+ Fetch all data for comprehensive analysis
267
+
268
+ Args:
269
+ peer_tickers: Optional list of peer tickers for comparison
270
+
271
+ Returns:
272
+ Complete dataset
273
+ """
274
+ print(f"\n{'='*60}")
275
+ print(f"Fetching data for {self.ticker}...")
276
+ print(f"{'='*60}\n")
277
+
278
+ # Company data
279
+ company_info = self.fetch_company_info()
280
+ print(f"✓ Company: {company_info.get('company_name', self.ticker)}")
281
+ print(f"✓ Sector: {company_info.get('sector', 'Unknown')}")
282
+ print(f"✓ Industry: {company_info.get('industry', 'Unknown')}\n")
283
+
284
+ # Financial statements
285
+ print("Fetching financial statements...")
286
+ statements = self.fetch_financial_statements()
287
+
288
+ # Key metrics
289
+ print("Fetching key metrics...")
290
+ metrics = self.fetch_key_metrics()
291
+
292
+ # Peer data
293
+ peer_data = {}
294
+ sector_metrics = {}
295
+
296
+ if peer_tickers:
297
+ print(f"\nFetching peer data for {len(peer_tickers)} companies...")
298
+ peer_data = self.fetch_peer_data(peer_tickers)
299
+ print(f"✓ Successfully fetched data for {len(peer_data)} peers")
300
+
301
+ if peer_data:
302
+ print("Calculating sector metrics...")
303
+ sector_metrics = self.calculate_sector_metrics(peer_data)
304
+ print(f"✓ Sector metrics calculated\n")
305
+
306
+ return {
307
+ 'ticker': self.ticker,
308
+ 'fetch_date': datetime.now().isoformat(),
309
+ 'company_info': company_info,
310
+ 'financial_statements': statements,
311
+ 'metrics': metrics,
312
+ 'peer_data': peer_data,
313
+ 'sector_metrics': sector_metrics
314
+ }
315
+
316
+
317
+ if __name__ == "__main__":
318
+ # Example usage
319
+ ticker = input("Enter ticker symbol: ").upper()
320
+
321
+ fetcher = FinancialDataFetcher(ticker)
322
+
323
+ # Ask for peer tickers
324
+ peers_input = input("Enter peer tickers (comma-separated, or press Enter to skip): ").strip()
325
+ peer_tickers = [p.strip().upper() for p in peers_input.split(',')] if peers_input else None
326
+
327
+ data = fetcher.fetch_all_data(peer_tickers)
328
+
329
+ print(f"\n{'='*60}")
330
+ print("DATA COLLECTION COMPLETE")
331
+ print(f"{'='*60}")
332
+ print(f"Company: {data['company_info'].get('company_name')}")
333
+ print(f"Metrics collected: {len([k for k, v in data['metrics'].items() if v is not None])}")
334
+ if data['peer_data']:
335
+ print(f"Peers analyzed: {len(data['peer_data'])}")
336
+ print(f"Sector metrics: {len(data['sector_metrics'])}")
src/fundamental_analysis/decision_engine.py ADDED
@@ -0,0 +1,542 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Investment decision engine.
3
+ Combines all analyses to generate BUY/SELL/HOLD recommendations.
4
+ """
5
+
6
+ import numpy as np
7
+ from typing import Dict, List, Tuple
8
+ from datetime import datetime
9
+
10
+
11
+ class InvestmentDecisionEngine:
12
+ """Generates investment recommendations based on comprehensive analysis"""
13
+
14
+ def __init__(self,
15
+ financial_data: Dict,
16
+ fundamental_analysis: Dict,
17
+ sector_analysis: Dict,
18
+ valuation_analysis: Dict):
19
+ """
20
+ Initialize decision engine
21
+
22
+ Args:
23
+ financial_data: Raw financial data
24
+ fundamental_analysis: Results from FinancialAnalyzer
25
+ sector_analysis: Results from SectorAnalyzer
26
+ valuation_analysis: Results from ValuationEngine
27
+ """
28
+ self.ticker = financial_data.get('ticker')
29
+ self.company_info = financial_data.get('company_info', {})
30
+ self.current_price = financial_data.get('metrics', {}).get('current_price', 0)
31
+
32
+ self.fundamental = fundamental_analysis
33
+ self.sector = sector_analysis
34
+ self.valuation = valuation_analysis
35
+
36
+ self.score_weights = {
37
+ 'fundamental': 0.35,
38
+ 'sector': 0.25,
39
+ 'valuation': 0.40
40
+ }
41
+
42
+ def score_fundamentals(self) -> Dict:
43
+ """
44
+ Score company fundamentals (0-100)
45
+
46
+ Returns:
47
+ Fundamental scores and assessment
48
+ """
49
+ scores = {}
50
+
51
+ # Growth score (0-25)
52
+ growth = self.fundamental.get('growth_analysis', {})
53
+ growth_quality = growth.get('growth_quality', 'Insufficient Data')
54
+ growth_scores = {
55
+ 'Strong': 25, 'Good': 18, 'Moderate': 12,
56
+ 'Weak': 5, 'Insufficient Data': 10
57
+ }
58
+ scores['growth'] = growth_scores.get(growth_quality, 10)
59
+
60
+ # Margin score (0-25)
61
+ margins = self.fundamental.get('margin_analysis', {})
62
+ margin_quality = margins.get('margin_quality', 'Insufficient Data')
63
+ margin_scores = {
64
+ 'Excellent': 25, 'Good': 18, 'Moderate': 12,
65
+ 'Weak': 5, 'Insufficient Data': 10
66
+ }
67
+ scores['margins'] = margin_scores.get(margin_quality, 10)
68
+
69
+ # Returns score (0-25)
70
+ returns = self.fundamental.get('returns_analysis', {})
71
+ returns_quality = returns.get('returns_quality', 'Insufficient Data')
72
+ returns_scores = {
73
+ 'Excellent': 25, 'Good': 18, 'Moderate': 12,
74
+ 'Weak': 5, 'Insufficient Data': 10
75
+ }
76
+ scores['returns'] = returns_scores.get(returns_quality, 10)
77
+
78
+ # Cash flow score (0-25)
79
+ cash_flow = self.fundamental.get('cash_flow_analysis', {})
80
+ cf_quality = cash_flow.get('cash_flow_quality', 'Insufficient Data')
81
+ cf_scores = {
82
+ 'Excellent': 25, 'Good': 18, 'Moderate': 12,
83
+ 'Weak': 5, 'Negative': 0, 'Insufficient Data': 10
84
+ }
85
+ scores['cash_flow'] = cf_scores.get(cf_quality, 10)
86
+
87
+ total_score = sum(scores.values())
88
+
89
+ return {
90
+ 'scores': scores,
91
+ 'total': total_score,
92
+ 'max': 100,
93
+ 'percentage': total_score,
94
+ 'grade': self._get_grade(total_score),
95
+ 'assessment': self._assess_fundamentals(total_score)
96
+ }
97
+
98
+ def score_sector_position(self) -> Dict:
99
+ """
100
+ Score company's position within sector (0-100)
101
+
102
+ Returns:
103
+ Sector position scores and assessment
104
+ """
105
+ scores = {}
106
+
107
+ # Sector sentiment (0-30)
108
+ sentiment = self.sector.get('sector_sentiment', {})
109
+ overall_sentiment = sentiment.get('overall_sentiment', 'Neutral')
110
+
111
+ if 'Positive' in overall_sentiment or 'tailwinds' in overall_sentiment:
112
+ scores['sector_sentiment'] = 30
113
+ elif 'Negative' in overall_sentiment or 'headwinds' in overall_sentiment:
114
+ scores['sector_sentiment'] = 10
115
+ else:
116
+ scores['sector_sentiment'] = 20
117
+
118
+ # Competitive position (0-35)
119
+ ranking = self.sector.get('sector_ranking', {})
120
+ overall_position = ranking.get('overall_position', 'Unknown')
121
+
122
+ position_scores = {
123
+ 'Top 20%': 35, 'Top 40%': 25, 'Middle': 18,
124
+ 'Bottom 40%': 10, 'Bottom 20%': 5, 'Unknown': 18
125
+ }
126
+ scores['competitive_position'] = position_scores.get(overall_position, 18)
127
+
128
+ # Relative profitability (0-20)
129
+ profitability_comp = self.sector.get('profitability_comparison', {})
130
+ prof_vs_sector = profitability_comp.get('profitability_vs_sector', 'Unknown')
131
+
132
+ prof_scores = {
133
+ 'Top Performer': 20, 'Above Average': 15,
134
+ 'Below Average': 8, 'Bottom Performer': 3, 'Unknown': 10
135
+ }
136
+ scores['relative_profitability'] = prof_scores.get(prof_vs_sector, 10)
137
+
138
+ # Relative growth (0-15)
139
+ growth_comp = self.sector.get('growth_comparison', {})
140
+ growth_vs_sector = growth_comp.get('growth_vs_sector', 'Unknown')
141
+
142
+ growth_scores = {
143
+ 'Fast Grower': 15, 'Above Average Growth': 11,
144
+ 'Below Average Growth': 6, 'Lagging Sector': 2, 'Unknown': 8
145
+ }
146
+ scores['relative_growth'] = growth_scores.get(growth_vs_sector, 8)
147
+
148
+ total_score = sum(scores.values())
149
+
150
+ return {
151
+ 'scores': scores,
152
+ 'total': total_score,
153
+ 'max': 100,
154
+ 'percentage': total_score,
155
+ 'grade': self._get_grade(total_score),
156
+ 'assessment': self._assess_sector_position(total_score, overall_sentiment)
157
+ }
158
+
159
+ def score_valuation(self) -> Dict:
160
+ """
161
+ Score valuation attractiveness (0-100)
162
+
163
+ Returns:
164
+ Valuation scores and assessment
165
+ """
166
+ scores = {}
167
+
168
+ # DCF valuation (0-40)
169
+ dcf = self.valuation.get('dcf_valuation', {})
170
+ dcf_upside = dcf.get('upside_percent', 0)
171
+
172
+ if 'error' not in dcf:
173
+ if dcf_upside > 30:
174
+ scores['dcf'] = 40
175
+ elif dcf_upside > 15:
176
+ scores['dcf'] = 30
177
+ elif dcf_upside > 0:
178
+ scores['dcf'] = 20
179
+ elif dcf_upside > -15:
180
+ scores['dcf'] = 10
181
+ else:
182
+ scores['dcf'] = 0
183
+ else:
184
+ scores['dcf'] = 20 # Neutral if DCF not applicable
185
+
186
+ # Relative valuation (0-40)
187
+ relative = self.valuation.get('relative_valuation', {})
188
+ avg_upside = relative.get('average_upside', 0)
189
+
190
+ if avg_upside != 0:
191
+ if avg_upside > 25:
192
+ scores['relative'] = 40
193
+ elif avg_upside > 10:
194
+ scores['relative'] = 30
195
+ elif avg_upside > 0:
196
+ scores['relative'] = 20
197
+ elif avg_upside > -15:
198
+ scores['relative'] = 10
199
+ else:
200
+ scores['relative'] = 0
201
+ else:
202
+ scores['relative'] = 20 # Neutral if not available
203
+
204
+ # Margin of safety (0-20)
205
+ mos = self.valuation.get('margin_of_safety', {})
206
+ dcf_mos = mos.get('dcf_margin_of_safety', {})
207
+ mos_percent = dcf_mos.get('margin_percent', 0)
208
+
209
+ if mos_percent > 30:
210
+ scores['margin_of_safety'] = 20
211
+ elif mos_percent > 20:
212
+ scores['margin_of_safety'] = 15
213
+ elif mos_percent > 10:
214
+ scores['margin_of_safety'] = 10
215
+ elif mos_percent > 0:
216
+ scores['margin_of_safety'] = 5
217
+ else:
218
+ scores['margin_of_safety'] = 0
219
+
220
+ total_score = sum(scores.values())
221
+
222
+ return {
223
+ 'scores': scores,
224
+ 'total': total_score,
225
+ 'max': 100,
226
+ 'percentage': total_score,
227
+ 'grade': self._get_grade(total_score),
228
+ 'assessment': self._assess_valuation(total_score)
229
+ }
230
+
231
+ def calculate_overall_score(self) -> Dict:
232
+ """
233
+ Calculate weighted overall investment score
234
+
235
+ Returns:
236
+ Overall score and breakdown
237
+ """
238
+ fundamental_score = self.score_fundamentals()
239
+ sector_score = self.score_sector_position()
240
+ valuation_score = self.score_valuation()
241
+
242
+ # Calculate weighted score
243
+ weighted_score = (
244
+ fundamental_score['percentage'] * self.score_weights['fundamental'] +
245
+ sector_score['percentage'] * self.score_weights['sector'] +
246
+ valuation_score['percentage'] * self.score_weights['valuation']
247
+ )
248
+
249
+ return {
250
+ 'overall_score': weighted_score,
251
+ 'max_score': 100,
252
+ 'grade': self._get_grade(weighted_score),
253
+ 'breakdown': {
254
+ 'fundamental': {
255
+ 'score': fundamental_score['percentage'],
256
+ 'weight': self.score_weights['fundamental'],
257
+ 'weighted_score': fundamental_score['percentage'] * self.score_weights['fundamental'],
258
+ 'details': fundamental_score
259
+ },
260
+ 'sector': {
261
+ 'score': sector_score['percentage'],
262
+ 'weight': self.score_weights['sector'],
263
+ 'weighted_score': sector_score['percentage'] * self.score_weights['sector'],
264
+ 'details': sector_score
265
+ },
266
+ 'valuation': {
267
+ 'score': valuation_score['percentage'],
268
+ 'weight': self.score_weights['valuation'],
269
+ 'weighted_score': valuation_score['percentage'] * self.score_weights['valuation'],
270
+ 'details': valuation_score
271
+ }
272
+ }
273
+ }
274
+
275
+ def generate_recommendation(self) -> str:
276
+ """
277
+ Generate BUY/SELL/HOLD recommendation
278
+
279
+ Returns:
280
+ Investment recommendation
281
+ """
282
+ overall = self.calculate_overall_score()
283
+ score = overall['overall_score']
284
+
285
+ # Base recommendation on score
286
+ if score >= 70:
287
+ base_rec = "STRONG BUY"
288
+ elif score >= 60:
289
+ base_rec = "BUY"
290
+ elif score >= 50:
291
+ base_rec = "HOLD"
292
+ elif score >= 40:
293
+ base_rec = "SELL"
294
+ else:
295
+ base_rec = "STRONG SELL"
296
+
297
+ # Adjust based on specific red flags
298
+ red_flags = self.identify_red_flags()
299
+
300
+ if red_flags['critical_issues']:
301
+ if base_rec in ["STRONG BUY", "BUY"]:
302
+ base_rec = "HOLD"
303
+ elif base_rec == "HOLD":
304
+ base_rec = "SELL"
305
+
306
+ return base_rec
307
+
308
+ def identify_red_flags(self) -> Dict:
309
+ """
310
+ Identify critical red flags
311
+
312
+ Returns:
313
+ Red flags and warnings
314
+ """
315
+ red_flags = {
316
+ 'critical_issues': [],
317
+ 'warnings': [],
318
+ 'positive_signs': []
319
+ }
320
+
321
+ # Check cash flow
322
+ cf_analysis = self.fundamental.get('cash_flow_analysis', {})
323
+ if cf_analysis.get('free_cash_flow', 0) < 0:
324
+ red_flags['critical_issues'].append("Negative free cash flow")
325
+
326
+ # Check earnings
327
+ growth = self.fundamental.get('growth_analysis', {})
328
+ if growth.get('net_income_growth_yoy', 0) < -0.10:
329
+ red_flags['warnings'].append("Declining earnings (>10% drop)")
330
+
331
+ # Check margins
332
+ margins = self.fundamental.get('margin_analysis', {})
333
+ if margins.get('operating_margin_trend', 0) < -0.02:
334
+ red_flags['warnings'].append("Contracting operating margins")
335
+
336
+ # Check debt
337
+ health = self.fundamental.get('financial_health', {})
338
+ interest_coverage = health.get('interest_coverage')
339
+ if interest_coverage and interest_coverage < 2:
340
+ red_flags['critical_issues'].append("Low interest coverage (<2x)")
341
+
342
+ # Check valuation
343
+ valuation_score = self.score_valuation()
344
+ if valuation_score['percentage'] < 25:
345
+ red_flags['warnings'].append("Expensive valuation")
346
+
347
+ # Check sector
348
+ sector_sentiment = self.sector.get('sector_sentiment', {})
349
+ if 'Negative' in sector_sentiment.get('overall_sentiment', ''):
350
+ red_flags['warnings'].append("Sector facing headwinds")
351
+
352
+ # Positive signs
353
+ if cf_analysis.get('fcf_positive_trend', False):
354
+ red_flags['positive_signs'].append("Growing free cash flow")
355
+
356
+ if margins.get('operating_leverage', False):
357
+ red_flags['positive_signs'].append("Expanding operating margins")
358
+
359
+ returns = self.fundamental.get('returns_analysis', {})
360
+ if returns.get('roic', 0) > 0.15:
361
+ red_flags['positive_signs'].append("Strong return on invested capital (>15%)")
362
+
363
+ return red_flags
364
+
365
+ def generate_confidence_score(self) -> Dict:
366
+ """
367
+ Generate confidence level in recommendation
368
+
369
+ Returns:
370
+ Confidence metrics
371
+ """
372
+ confidence_factors = []
373
+
374
+ # Data quality
375
+ fundamental = self.score_fundamentals()
376
+ if fundamental['grade'] != 'F':
377
+ confidence_factors.append(0.3)
378
+
379
+ # Sector data available
380
+ if self.sector.get('peer_count', 0) > 0:
381
+ confidence_factors.append(0.25)
382
+
383
+ # Valuation methods available
384
+ valuation = self.valuation.get('relative_valuation', {})
385
+ methods_available = sum([
386
+ 'pe_valuation' in valuation,
387
+ 'peg_valuation' in valuation,
388
+ 'pb_valuation' in valuation
389
+ ])
390
+ confidence_factors.append(methods_available * 0.15)
391
+
392
+ # Score consistency
393
+ overall = self.calculate_overall_score()
394
+ breakdown = overall['breakdown']
395
+ scores = [
396
+ breakdown['fundamental']['score'],
397
+ breakdown['sector']['score'],
398
+ breakdown['valuation']['score']
399
+ ]
400
+ std_dev = np.std(scores)
401
+ if std_dev < 15: # Scores are consistent
402
+ confidence_factors.append(0.15)
403
+
404
+ confidence = sum(confidence_factors)
405
+
406
+ return {
407
+ 'confidence_score': min(confidence, 1.0),
408
+ 'confidence_level': self._get_confidence_level(confidence),
409
+ 'factors': confidence_factors
410
+ }
411
+
412
+ def _get_grade(self, score: float) -> str:
413
+ """Convert score to letter grade"""
414
+ if score >= 90:
415
+ return 'A+'
416
+ elif score >= 80:
417
+ return 'A'
418
+ elif score >= 70:
419
+ return 'B'
420
+ elif score >= 60:
421
+ return 'C'
422
+ elif score >= 50:
423
+ return 'D'
424
+ else:
425
+ return 'F'
426
+
427
+ def _get_confidence_level(self, confidence: float) -> str:
428
+ """Convert confidence score to level"""
429
+ if confidence >= 0.8:
430
+ return "Very High"
431
+ elif confidence >= 0.6:
432
+ return "High"
433
+ elif confidence >= 0.4:
434
+ return "Moderate"
435
+ else:
436
+ return "Low"
437
+
438
+ def _assess_fundamentals(self, score: float) -> str:
439
+ """Assess fundamental strength"""
440
+ if score >= 80:
441
+ return "Excellent fundamentals - Strong business"
442
+ elif score >= 60:
443
+ return "Good fundamentals - Solid business"
444
+ elif score >= 40:
445
+ return "Moderate fundamentals - Average business"
446
+ else:
447
+ return "Weak fundamentals - Concerning business"
448
+
449
+ def _assess_sector_position(self, score: float, sentiment: str) -> str:
450
+ """Assess sector position"""
451
+ if score >= 70:
452
+ return f"Leading position in sector ({sentiment})"
453
+ elif score >= 50:
454
+ return f"Average position in sector ({sentiment})"
455
+ else:
456
+ return f"Weak position in sector ({sentiment})"
457
+
458
+ def _assess_valuation(self, score: float) -> str:
459
+ """Assess valuation"""
460
+ if score >= 70:
461
+ return "Attractive valuation - Undervalued"
462
+ elif score >= 50:
463
+ return "Fair valuation - Reasonably priced"
464
+ elif score >= 30:
465
+ return "Full valuation - Fairly valued to slightly expensive"
466
+ else:
467
+ return "Expensive valuation - Overvalued"
468
+
469
+ def generate_investment_thesis(self) -> str:
470
+ """
471
+ Generate investment thesis narrative
472
+
473
+ Returns:
474
+ Investment thesis text
475
+ """
476
+ recommendation = self.generate_recommendation()
477
+ overall = self.calculate_overall_score()
478
+ red_flags = self.identify_red_flags()
479
+
480
+ thesis = []
481
+
482
+ # Opening
483
+ thesis.append(f"**{recommendation}** - Overall Score: {overall['overall_score']:.1f}/100 ({overall['grade']})")
484
+ thesis.append("")
485
+
486
+ # Fundamental assessment
487
+ fund_score = overall['breakdown']['fundamental']['details']
488
+ thesis.append(f"**Fundamentals ({fund_score['percentage']:.0f}/100):** {fund_score['assessment']}")
489
+
490
+ # Sector position
491
+ sector_score = overall['breakdown']['sector']['details']
492
+ thesis.append(f"**Sector Position ({sector_score['percentage']:.0f}/100):** {sector_score['assessment']}")
493
+
494
+ # Valuation
495
+ val_score = overall['breakdown']['valuation']['details']
496
+ thesis.append(f"**Valuation ({val_score['percentage']:.0f}/100):** {val_score['assessment']}")
497
+ thesis.append("")
498
+
499
+ # Key positives
500
+ if red_flags['positive_signs']:
501
+ thesis.append("**Key Strengths:**")
502
+ for sign in red_flags['positive_signs']:
503
+ thesis.append(f" ✓ {sign}")
504
+ thesis.append("")
505
+
506
+ # Key concerns
507
+ if red_flags['critical_issues'] or red_flags['warnings']:
508
+ thesis.append("**Key Concerns:**")
509
+ for issue in red_flags['critical_issues']:
510
+ thesis.append(f" ⚠ {issue}")
511
+ for warning in red_flags['warnings']:
512
+ thesis.append(f" • {warning}")
513
+ thesis.append("")
514
+
515
+ # Confidence
516
+ confidence = self.generate_confidence_score()
517
+ thesis.append(f"**Confidence Level:** {confidence['confidence_level']} ({confidence['confidence_score']*100:.0f}%)")
518
+
519
+ return "\n".join(thesis)
520
+
521
+ def generate_decision_report(self) -> Dict:
522
+ """
523
+ Generate comprehensive investment decision report
524
+
525
+ Returns:
526
+ Complete decision analysis
527
+ """
528
+ return {
529
+ 'ticker': self.ticker,
530
+ 'company_name': self.company_info.get('company_name', self.ticker),
531
+ 'current_price': self.current_price,
532
+ 'analysis_date': datetime.now().isoformat(),
533
+ 'recommendation': self.generate_recommendation(),
534
+ 'overall_score': self.calculate_overall_score(),
535
+ 'confidence': self.generate_confidence_score(),
536
+ 'red_flags': self.identify_red_flags(),
537
+ 'investment_thesis': self.generate_investment_thesis()
538
+ }
539
+
540
+
541
+ if __name__ == "__main__":
542
+ print("This module is meant to be imported and used with results from other analyzers")
src/fundamental_analysis/decision_maker.py ADDED
@@ -0,0 +1,620 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Investment Decision Maker
3
+ Main entry point: Input ticker → Output BUY/SELL/HOLD recommendation
4
+ Uses composite scoring based on institutional methodology
5
+ """
6
+
7
+ import pandas as pd
8
+ import numpy as np
9
+ from typing import Dict, Tuple, Optional, List
10
+ from fundamental_analysis.calculator import calculate_metrics_for_ticker
11
+ from fundamental_analysis.sector_analyzer import SectorAnalyzer
12
+ import yfinance as yf
13
+ import warnings
14
+ warnings.filterwarnings('ignore')
15
+
16
+
17
+ class InvestmentDecision:
18
+ """Make BUY/SELL/HOLD decisions based on comprehensive analysis"""
19
+
20
+ # Scoring weights based on institutional methodology
21
+ WEIGHTS = {
22
+ 'fcf_yield': 0.25, # 25% - HIGHEST priority
23
+ 'roic': 0.25, # 25% - Quality indicator
24
+ 'growth': 0.15, # 15% - Growth metrics
25
+ 'valuation': 0.15, # 15% - Valuation ratios
26
+ 'leverage': 0.10, # 10% - Financial health
27
+ 'capital_allocation': 0.10 # 10% - Capital management
28
+ }
29
+
30
+ # Decision thresholds
31
+ THRESHOLDS = {
32
+ 'buy': 0.35, # Score >= 0.35 → BUY
33
+ 'sell': -0.10 # Score < -0.10 → SELL
34
+ }
35
+
36
+ def __init__(self, ticker: str, compare_to_sector: bool = True):
37
+ """
38
+ Initialize decision maker
39
+
40
+ Args:
41
+ ticker: Stock ticker symbol
42
+ compare_to_sector: Whether to compare against sector peers
43
+ """
44
+ self.ticker = ticker.upper()
45
+ self.compare_to_sector = compare_to_sector
46
+ self.metrics_df = pd.DataFrame()
47
+ self.metrics_dict = {}
48
+ self.sector_comparison = pd.DataFrame()
49
+ self.sector_percentiles = {}
50
+ self.scores = {}
51
+ self.final_score = 0.0
52
+ self.recommendation = "HOLD"
53
+ self.confidence = 0.0
54
+ self.reasoning = []
55
+
56
+ def analyze(self) -> Dict:
57
+ """
58
+ Perform complete analysis and generate recommendation
59
+
60
+ Returns:
61
+ Dictionary with recommendation and detailed analysis
62
+ """
63
+ print(f"\n{'='*80}")
64
+ print(f"INVESTMENT ANALYSIS: {self.ticker}")
65
+ print(f"{'='*80}\n")
66
+
67
+ # Step 1: Calculate all metrics
68
+ print("Step 1: Calculating financial metrics...")
69
+ self.metrics_df, summary = calculate_metrics_for_ticker(self.ticker)
70
+
71
+ if self.metrics_df.empty:
72
+ return self._error_result("Failed to fetch data")
73
+
74
+ # Convert metrics to dictionary for easier access
75
+ self._build_metrics_dict()
76
+
77
+ # Step 2: Get sector comparison
78
+ if self.compare_to_sector:
79
+ print("\nStep 2: Comparing to sector peers...")
80
+ self._compare_to_sector()
81
+ else:
82
+ print("\nStep 2: Skipping sector comparison")
83
+
84
+ # Step 3: Score each category
85
+ print("\nStep 3: Scoring investment criteria...")
86
+ self._score_all_categories()
87
+
88
+ # Step 4: Calculate final score and recommendation
89
+ print("\nStep 4: Generating recommendation...")
90
+ self._calculate_final_score()
91
+ self._determine_recommendation()
92
+
93
+ # Step 5: Build reasoning
94
+ self._build_reasoning()
95
+
96
+ # Return complete analysis
97
+ return self._build_result()
98
+
99
+ def _build_metrics_dict(self):
100
+ """Convert metrics DataFrame to dictionary for easier access"""
101
+ for _, row in self.metrics_df.iterrows():
102
+ if row['Status'] == 'Available':
103
+ metric_name = row['Metric'].replace(' ', '_').replace('/', '_').replace('(', '').replace(')', '').replace('%', 'pct')
104
+ self.metrics_dict[metric_name] = row['Value']
105
+
106
+ def _get_metric(self, metric_name: str) -> Optional[float]:
107
+ """Safely get metric value"""
108
+ row = self.metrics_df[self.metrics_df['Metric'] == metric_name]
109
+ if not row.empty and row.iloc[0]['Status'] == 'Available':
110
+ return row.iloc[0]['Value']
111
+ return None
112
+
113
+ def _compare_to_sector(self):
114
+ """Compare stock to sector peers"""
115
+ try:
116
+ # Get company info to determine sector
117
+ stock = yf.Ticker(self.ticker)
118
+ info = stock.info
119
+ sector = info.get('sector', 'Unknown')
120
+
121
+ if sector == 'Unknown':
122
+ print(" ⚠ Could not determine sector, skipping comparison")
123
+ return
124
+
125
+ print(f" Sector: {sector}")
126
+
127
+ # Map yfinance sectors to our predefined sectors
128
+ sector_mapping = {
129
+ 'Technology': 'Technology',
130
+ 'Financial Services': 'Financial',
131
+ 'Healthcare': 'Healthcare',
132
+ 'Consumer Cyclical': 'Consumer',
133
+ 'Consumer Defensive': 'Consumer',
134
+ 'Industrials': 'Industrial',
135
+ 'Energy': 'Energy',
136
+ 'Basic Materials': 'Materials',
137
+ 'Real Estate': 'Real Estate',
138
+ 'Communication Services': 'Communication'
139
+ }
140
+
141
+ mapped_sector = sector_mapping.get(sector)
142
+ if not mapped_sector:
143
+ print(f" ⚠ Sector '{sector}' not in predefined list")
144
+ return
145
+
146
+ # Analyze sector with a subset of stocks (to avoid long wait)
147
+ analyzer = SectorAnalyzer(mapped_sector)
148
+ tickers = analyzer.get_sector_tickers()
149
+
150
+ # Limit to 10 stocks for faster analysis
151
+ if len(tickers) > 10:
152
+ tickers = tickers[:10]
153
+ print(f" Analyzing {len(tickers)} peer stocks (limited sample)...")
154
+
155
+ comparison_df = analyzer.calculate_sector_metrics(tickers)
156
+
157
+ if not comparison_df.empty and self.ticker in comparison_df.index:
158
+ self.sector_comparison = analyzer.compare_stock_to_peers(self.ticker, comparison_df)
159
+ self.sector_percentiles = comparison_df.rank(pct=True).loc[self.ticker].to_dict()
160
+ print(f" ✓ Sector comparison complete")
161
+ else:
162
+ print(f" ⚠ Could not compare to sector")
163
+
164
+ except Exception as e:
165
+ print(f" ⚠ Sector comparison error: {str(e)}")
166
+
167
+ def _score_all_categories(self):
168
+ """Score each investment category"""
169
+ self.scores['fcf_yield'] = self._score_fcf_yield()
170
+ self.scores['roic'] = self._score_roic()
171
+ self.scores['growth'] = self._score_growth()
172
+ self.scores['valuation'] = self._score_valuation()
173
+ self.scores['leverage'] = self._score_leverage()
174
+ self.scores['capital_allocation'] = self._score_capital_allocation()
175
+
176
+ # Print scores
177
+ for category, score in self.scores.items():
178
+ print(f" {category.replace('_', ' ').title()}: {score:+.2f}")
179
+
180
+ def _score_fcf_yield(self) -> float:
181
+ """
182
+ Score FCF Yield (HIGHEST PRIORITY - 25% weight)
183
+ Threshold: >6% = BUY, 4-6% = HOLD, <3% = SELL
184
+ """
185
+ fcf_yield = self._get_metric('FCF Yield (Enterprise) %')
186
+
187
+ if fcf_yield is None:
188
+ return 0.0
189
+
190
+ # Scoring rules
191
+ if fcf_yield >= 6.0:
192
+ score = 1.0 # Strong buy signal
193
+ elif fcf_yield >= 4.0:
194
+ score = 0.5 # Hold
195
+ elif fcf_yield >= 3.0:
196
+ score = 0.0 # Neutral
197
+ else:
198
+ score = -1.0 # Sell signal
199
+
200
+ # Adjust based on sector percentile if available
201
+ if 'FCF_Yield_%' in self.sector_percentiles:
202
+ percentile = self.sector_percentiles['FCF_Yield_%']
203
+ if percentile > 0.75:
204
+ score += 0.3 # Top quartile bonus
205
+ elif percentile < 0.25:
206
+ score -= 0.3 # Bottom quartile penalty
207
+
208
+ return np.clip(score, -1.0, 1.0)
209
+
210
+ def _score_roic(self) -> float:
211
+ """
212
+ Score ROIC (VERY HIGH PRIORITY - 25% weight)
213
+ Threshold: >15% = excellent, >10% = good, <10% = concern
214
+ """
215
+ roic = self._get_metric('Return on Invested Capital (ROIC) %')
216
+
217
+ if roic is None:
218
+ return 0.0
219
+
220
+ # Scoring rules
221
+ if roic >= 20.0:
222
+ score = 1.0 # Exceptional
223
+ elif roic >= 15.0:
224
+ score = 0.7 # Excellent
225
+ elif roic >= 10.0:
226
+ score = 0.3 # Good
227
+ elif roic >= 5.0:
228
+ score = -0.3 # Mediocre
229
+ else:
230
+ score = -1.0 # Poor
231
+
232
+ # Adjust based on sector percentile
233
+ if 'ROIC_%' in self.sector_percentiles:
234
+ percentile = self.sector_percentiles['ROIC_%']
235
+ if percentile > 0.75:
236
+ score += 0.2
237
+ elif percentile < 0.25:
238
+ score -= 0.2
239
+
240
+ return np.clip(score, -1.0, 1.0)
241
+
242
+ def _score_growth(self) -> float:
243
+ """
244
+ Score Growth Metrics (15% weight)
245
+ Revenue growth, EPS growth
246
+ """
247
+ rev_growth = self._get_metric('Revenue Growth (YoY) %')
248
+ eps_growth = self._get_metric('EPS Growth (YoY) %')
249
+
250
+ if rev_growth is None and eps_growth is None:
251
+ return 0.0
252
+
253
+ scores = []
254
+
255
+ # Revenue growth scoring
256
+ if rev_growth is not None:
257
+ if rev_growth >= 20.0:
258
+ scores.append(1.0)
259
+ elif rev_growth >= 10.0:
260
+ scores.append(0.5)
261
+ elif rev_growth >= 5.0:
262
+ scores.append(0.2)
263
+ elif rev_growth >= 0.0:
264
+ scores.append(-0.2)
265
+ else:
266
+ scores.append(-1.0) # Declining revenue
267
+
268
+ # EPS growth scoring
269
+ if eps_growth is not None:
270
+ if eps_growth >= 20.0:
271
+ scores.append(1.0)
272
+ elif eps_growth >= 10.0:
273
+ scores.append(0.5)
274
+ elif eps_growth >= 5.0:
275
+ scores.append(0.2)
276
+ elif eps_growth >= 0.0:
277
+ scores.append(-0.2)
278
+ else:
279
+ scores.append(-1.0)
280
+
281
+ return np.clip(np.mean(scores) if scores else 0.0, -1.0, 1.0)
282
+
283
+ def _score_valuation(self) -> float:
284
+ """
285
+ Score Valuation Metrics (15% weight)
286
+ P/E, PEG, EV/EBITDA relative to sector
287
+ """
288
+ pe_ratio = self._get_metric('P/E Ratio (TTM)')
289
+ peg_ratio = self._get_metric('PEG Ratio')
290
+ ev_ebitda = self._get_metric('EV/EBITDA')
291
+
292
+ scores = []
293
+
294
+ # PEG ratio scoring (most important valuation metric)
295
+ if peg_ratio is not None:
296
+ if peg_ratio < 0.8:
297
+ scores.append(1.0) # Undervalued
298
+ elif peg_ratio < 1.2:
299
+ scores.append(0.3) # Fair value
300
+ elif peg_ratio < 1.5:
301
+ scores.append(-0.3) # Slightly expensive
302
+ else:
303
+ scores.append(-1.0) # Overvalued
304
+
305
+ # P/E relative to sector
306
+ if 'PE_Ratio' in self.sector_percentiles:
307
+ percentile = self.sector_percentiles['PE_Ratio']
308
+ # Lower P/E is better (reverse percentile)
309
+ if percentile < 0.33:
310
+ scores.append(0.7) # Cheap relative to sector
311
+ elif percentile < 0.67:
312
+ scores.append(0.0) # Fair
313
+ else:
314
+ scores.append(-0.7) # Expensive
315
+
316
+ # EV/EBITDA relative to sector
317
+ if 'EV_EBITDA' in self.sector_percentiles:
318
+ percentile = self.sector_percentiles['EV_EBITDA']
319
+ # Lower is better
320
+ if percentile < 0.33:
321
+ scores.append(0.5)
322
+ elif percentile > 0.67:
323
+ scores.append(-0.5)
324
+
325
+ return np.clip(np.mean(scores) if scores else 0.0, -1.0, 1.0)
326
+
327
+ def _score_leverage(self) -> float:
328
+ """
329
+ Score Leverage/Financial Health (10% weight)
330
+ Net Debt/EBITDA, Interest Coverage
331
+ """
332
+ net_debt_ebitda = self._get_metric('Net Debt / EBITDA')
333
+ current_ratio = self._get_metric('Current Ratio')
334
+ cash_conversion = self._get_metric('Cash Conversion Ratio')
335
+
336
+ scores = []
337
+
338
+ # Net Debt/EBITDA scoring
339
+ if net_debt_ebitda is not None:
340
+ if net_debt_ebitda < 1.0:
341
+ scores.append(1.0) # Very low leverage
342
+ elif net_debt_ebitda < 2.0:
343
+ scores.append(0.5) # Moderate
344
+ elif net_debt_ebitda < 3.0:
345
+ scores.append(0.0) # Acceptable
346
+ elif net_debt_ebitda < 4.0:
347
+ scores.append(-0.5) # High
348
+ else:
349
+ scores.append(-1.0) # Very high risk
350
+
351
+ # Current ratio
352
+ if current_ratio is not None:
353
+ if current_ratio >= 2.0:
354
+ scores.append(0.5)
355
+ elif current_ratio >= 1.5:
356
+ scores.append(0.2)
357
+ elif current_ratio >= 1.0:
358
+ scores.append(-0.2)
359
+ else:
360
+ scores.append(-0.5)
361
+
362
+ # Cash conversion (quality of earnings)
363
+ if cash_conversion is not None:
364
+ if cash_conversion >= 1.2:
365
+ scores.append(0.5)
366
+ elif cash_conversion >= 1.0:
367
+ scores.append(0.2)
368
+ elif cash_conversion >= 0.8:
369
+ scores.append(-0.2)
370
+ else:
371
+ scores.append(-0.5) # Red flag
372
+
373
+ return np.clip(np.mean(scores) if scores else 0.0, -1.0, 1.0)
374
+
375
+ def _score_capital_allocation(self) -> float:
376
+ """
377
+ Score Capital Allocation (10% weight)
378
+ Dividends, buybacks, total payout ratio
379
+ """
380
+ payout_ratio = self._get_metric('Payout Ratio %')
381
+ total_payout = self._get_metric('Total Payout Ratio %')
382
+ roe = self._get_metric('Return on Equity (ROE) %')
383
+
384
+ scores = []
385
+
386
+ # Payout ratio - should be sustainable
387
+ if payout_ratio is not None:
388
+ if payout_ratio < 40.0:
389
+ scores.append(0.5) # Low, room to grow
390
+ elif payout_ratio < 60.0:
391
+ scores.append(0.3) # Sustainable
392
+ elif payout_ratio < 80.0:
393
+ scores.append(-0.2) # High
394
+ else:
395
+ scores.append(-0.5) # Potentially unsustainable
396
+
397
+ # Total payout (dividends + buybacks)
398
+ if total_payout is not None and roe is not None:
399
+ # Good capital allocation returns cash to shareholders while maintaining high ROE
400
+ if roe > 15.0 and total_payout > 50.0:
401
+ scores.append(0.5) # Strong returns + returning cash
402
+ elif roe > 15.0:
403
+ scores.append(0.3) # Strong returns, could return more
404
+ elif total_payout > 50.0:
405
+ scores.append(-0.3) # Returning cash but weak returns
406
+
407
+ return np.clip(np.mean(scores) if scores else 0.0, -1.0, 1.0)
408
+
409
+ def _calculate_final_score(self):
410
+ """Calculate weighted final score"""
411
+ self.final_score = sum(
412
+ self.scores.get(category, 0.0) * weight
413
+ for category, weight in self.WEIGHTS.items()
414
+ )
415
+
416
+ # Calculate confidence based on data availability and sector comparison
417
+ data_completeness = len([s for s in self.scores.values() if s != 0.0]) / len(self.scores)
418
+ sector_bonus = 0.15 if not self.sector_comparison.empty else 0.0
419
+ self.confidence = min(data_completeness + sector_bonus, 1.0)
420
+
421
+ def _determine_recommendation(self):
422
+ """Determine BUY/SELL/HOLD based on final score"""
423
+ if self.final_score >= self.THRESHOLDS['buy']:
424
+ self.recommendation = "BUY"
425
+ elif self.final_score < self.THRESHOLDS['sell']:
426
+ self.recommendation = "SELL"
427
+ else:
428
+ self.recommendation = "HOLD"
429
+
430
+ def _build_reasoning(self):
431
+ """Build human-readable reasoning for the recommendation"""
432
+ self.reasoning = []
433
+
434
+ # Overall assessment
435
+ if self.final_score >= 0.5:
436
+ self.reasoning.append("✓ Strong overall fundamentals")
437
+ elif self.final_score >= 0.2:
438
+ self.reasoning.append("✓ Positive fundamentals")
439
+ elif self.final_score >= -0.2:
440
+ self.reasoning.append("• Mixed fundamentals")
441
+ else:
442
+ self.reasoning.append("✗ Weak fundamentals")
443
+
444
+ # FCF Yield
445
+ fcf_yield = self._get_metric('FCF Yield (Enterprise) %')
446
+ if fcf_yield:
447
+ if fcf_yield >= 6.0:
448
+ self.reasoning.append(f"✓ Excellent FCF yield: {fcf_yield:.2f}%")
449
+ elif fcf_yield < 3.0:
450
+ self.reasoning.append(f"✗ Low FCF yield: {fcf_yield:.2f}%")
451
+
452
+ # ROIC
453
+ roic = self._get_metric('Return on Invested Capital (ROIC) %')
454
+ if roic:
455
+ if roic >= 15.0:
456
+ self.reasoning.append(f"✓ Strong ROIC: {roic:.2f}%")
457
+ elif roic < 10.0:
458
+ self.reasoning.append(f"✗ Weak ROIC: {roic:.2f}%")
459
+
460
+ # Growth
461
+ rev_growth = self._get_metric('Revenue Growth (YoY) %')
462
+ if rev_growth:
463
+ if rev_growth >= 15.0:
464
+ self.reasoning.append(f"✓ Strong revenue growth: {rev_growth:.2f}%")
465
+ elif rev_growth < 0:
466
+ self.reasoning.append(f"✗ Declining revenue: {rev_growth:.2f}%")
467
+
468
+ # Valuation
469
+ peg_ratio = self._get_metric('PEG Ratio')
470
+ if peg_ratio:
471
+ if peg_ratio < 0.8:
472
+ self.reasoning.append(f"✓ Undervalued (PEG: {peg_ratio:.2f})")
473
+ elif peg_ratio > 1.5:
474
+ self.reasoning.append(f"✗ Overvalued (PEG: {peg_ratio:.2f})")
475
+
476
+ # Leverage
477
+ net_debt_ebitda = self._get_metric('Net Debt / EBITDA')
478
+ if net_debt_ebitda is not None:
479
+ if net_debt_ebitda < 1.0:
480
+ self.reasoning.append(f"✓ Low leverage: {net_debt_ebitda:.2f}x")
481
+ elif net_debt_ebitda > 3.0:
482
+ self.reasoning.append(f"✗ High leverage: {net_debt_ebitda:.2f}x")
483
+
484
+ # Sector comparison
485
+ if 'ROIC_%' in self.sector_percentiles:
486
+ roic_pct = self.sector_percentiles['ROIC_%']
487
+ if roic_pct > 0.75:
488
+ self.reasoning.append(f"✓ Top quartile ROIC vs peers (P{int(roic_pct*100)})")
489
+ elif roic_pct < 0.25:
490
+ self.reasoning.append(f"✗ Bottom quartile ROIC vs peers (P{int(roic_pct*100)})")
491
+
492
+ def _build_result(self) -> Dict:
493
+ """Build final result dictionary"""
494
+ return {
495
+ 'ticker': self.ticker,
496
+ 'recommendation': self.recommendation,
497
+ 'final_score': self.final_score,
498
+ 'confidence': self.confidence,
499
+ 'category_scores': self.scores,
500
+ 'reasoning': self.reasoning,
501
+ 'key_metrics': {
502
+ 'FCF_Yield_%': self._get_metric('FCF Yield (Enterprise) %'),
503
+ 'ROIC_%': self._get_metric('Return on Invested Capital (ROIC) %'),
504
+ 'ROE_%': self._get_metric('Return on Equity (ROE) %'),
505
+ 'Revenue_Growth_%': self._get_metric('Revenue Growth (YoY) %'),
506
+ 'PEG_Ratio': self._get_metric('PEG Ratio'),
507
+ 'Net_Debt_EBITDA': self._get_metric('Net Debt / EBITDA'),
508
+ },
509
+ 'sector_percentiles': self.sector_percentiles if self.sector_percentiles else None
510
+ }
511
+
512
+ def _error_result(self, error_message: str) -> Dict:
513
+ """Return error result"""
514
+ return {
515
+ 'ticker': self.ticker,
516
+ 'recommendation': 'ERROR',
517
+ 'error': error_message,
518
+ 'final_score': 0.0,
519
+ 'confidence': 0.0
520
+ }
521
+
522
+ def print_analysis(self, result: Dict):
523
+ """Print formatted analysis report"""
524
+ print(f"\n{'='*80}")
525
+ print(f"INVESTMENT RECOMMENDATION: {result['ticker']}")
526
+ print(f"{'='*80}")
527
+
528
+ if result['recommendation'] == 'ERROR':
529
+ print(f"\n✗ ERROR: {result.get('error', 'Unknown error')}")
530
+ return
531
+
532
+ # Recommendation with color coding
533
+ rec = result['recommendation']
534
+ if rec == 'BUY':
535
+ print(f"\n🟢 RECOMMENDATION: {rec} (Score: {result['final_score']:+.2f})")
536
+ elif rec == 'SELL':
537
+ print(f"\n🔴 RECOMMENDATION: {rec} (Score: {result['final_score']:+.2f})")
538
+ else:
539
+ print(f"\n🟡 RECOMMENDATION: {rec} (Score: {result['final_score']:+.2f})")
540
+
541
+ print(f"Confidence: {result['confidence']:.0%}")
542
+
543
+ # Category scores
544
+ print(f"\n{'-'*80}")
545
+ print("CATEGORY SCORES (weighted)")
546
+ print(f"{'-'*80}")
547
+ for category, score in result['category_scores'].items():
548
+ weight = self.WEIGHTS[category]
549
+ weighted_score = score * weight
550
+ bar_length = int(abs(score) * 20)
551
+ bar = '█' * bar_length
552
+ print(f"{category.replace('_', ' ').title():25} {score:+.2f} ({weight:.0%}) → {weighted_score:+.3f} {bar}")
553
+
554
+ # Key metrics
555
+ print(f"\n{'-'*80}")
556
+ print("KEY METRICS")
557
+ print(f"{'-'*80}")
558
+ for metric, value in result['key_metrics'].items():
559
+ if value is not None:
560
+ print(f"{metric:30} {value:>12.2f}")
561
+
562
+ # Reasoning
563
+ print(f"\n{'-'*80}")
564
+ print("INVESTMENT RATIONALE")
565
+ print(f"{'-'*80}")
566
+ for reason in result['reasoning']:
567
+ print(f" {reason}")
568
+
569
+ print(f"\n{'='*80}\n")
570
+
571
+
572
+ def evaluate_stock(ticker: str, compare_to_sector: bool = True) -> Dict:
573
+ """
574
+ Main function to evaluate a stock and get recommendation
575
+
576
+ Args:
577
+ ticker: Stock ticker symbol
578
+ compare_to_sector: Whether to compare to sector peers
579
+
580
+ Returns:
581
+ Dictionary with recommendation and analysis
582
+ """
583
+ decision = InvestmentDecision(ticker, compare_to_sector)
584
+ result = decision.analyze()
585
+ decision.print_analysis(result)
586
+ return result
587
+
588
+
589
+ if __name__ == "__main__":
590
+ # Test with sample tickers
591
+ test_tickers = ['AAPL', 'GOOGL', 'MSFT']
592
+
593
+ print("INVESTMENT DECISION MAKER TEST")
594
+ print("=" * 80)
595
+
596
+ results = []
597
+ for ticker in test_tickers:
598
+ result = evaluate_stock(ticker, compare_to_sector=False) # Disable sector for speed
599
+ results.append(result)
600
+
601
+ # Summary comparison
602
+ print("\n" + "=" * 80)
603
+ print("SUMMARY COMPARISON")
604
+ print("=" * 80)
605
+
606
+ summary_df = pd.DataFrame([{
607
+ 'Ticker': r['ticker'],
608
+ 'Recommendation': r['recommendation'],
609
+ 'Score': r['final_score'],
610
+ 'Confidence': r['confidence'],
611
+ 'FCF Yield %': r['key_metrics']['FCF_Yield_%'],
612
+ 'ROIC %': r['key_metrics']['ROIC_%'],
613
+ 'PEG': r['key_metrics']['PEG_Ratio']
614
+ } for r in results])
615
+
616
+ print(summary_df.to_string(index=False))
617
+
618
+ # Save results
619
+ summary_df.to_csv('investment_recommendations.csv', index=False)
620
+ print(f"\n✓ Results saved to investment_recommendations.csv")
src/fundamental_analysis/financial_analyzer.py ADDED
@@ -0,0 +1,382 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Financial analysis engine for fundamental metrics.
3
+ Calculates growth, margins, returns, and cash flow metrics.
4
+ """
5
+
6
+ import pandas as pd
7
+ import numpy as np
8
+ from typing import Dict, Optional, Tuple, List
9
+ from datetime import datetime
10
+
11
+
12
+ class FinancialAnalyzer:
13
+ """Analyzes financial statements and calculates key metrics"""
14
+
15
+ def __init__(self, financial_data: Dict):
16
+ """
17
+ Initialize analyzer with fetched financial data
18
+
19
+ Args:
20
+ financial_data: Complete dataset from FinancialDataFetcher
21
+ """
22
+ self.data = financial_data
23
+ self.ticker = financial_data.get('ticker')
24
+ self.metrics = financial_data.get('metrics', {})
25
+ self.statements = financial_data.get('financial_statements', {})
26
+ self.company_info = financial_data.get('company_info', {})
27
+
28
+ def analyze_growth(self) -> Dict:
29
+ """
30
+ Analyze revenue and earnings growth trends
31
+
32
+ Returns:
33
+ Dictionary with growth metrics
34
+ """
35
+ results = {
36
+ 'revenue_growth_ttm': self.metrics.get('revenue_growth'),
37
+ 'earnings_growth_ttm': self.metrics.get('earnings_growth'),
38
+ }
39
+
40
+ # Calculate historical growth from income statement
41
+ try:
42
+ income_stmt = self.statements.get('income_statement')
43
+ if income_stmt is not None and not income_stmt.empty:
44
+
45
+ # Revenue growth (most recent vs 1 year ago)
46
+ if 'Total Revenue' in income_stmt.index:
47
+ revenues = income_stmt.loc['Total Revenue'].values
48
+ if len(revenues) >= 2:
49
+ results['revenue_growth_yoy'] = ((revenues[0] - revenues[1]) / abs(revenues[1])) if revenues[1] != 0 else None
50
+
51
+ # 3-year CAGR if available
52
+ if len(revenues) >= 3:
53
+ years = min(len(revenues) - 1, 3)
54
+ cagr = (revenues[0] / revenues[years]) ** (1/years) - 1 if revenues[years] != 0 else None
55
+ results['revenue_cagr_3y'] = cagr
56
+
57
+ # Net income growth
58
+ if 'Net Income' in income_stmt.index:
59
+ net_incomes = income_stmt.loc['Net Income'].values
60
+ if len(net_incomes) >= 2:
61
+ # Handle negative values
62
+ if net_incomes[1] != 0:
63
+ results['net_income_growth_yoy'] = ((net_incomes[0] - net_incomes[1]) / abs(net_incomes[1]))
64
+ except Exception as e:
65
+ print(f"Error calculating historical growth: {e}")
66
+
67
+ # Growth assessment
68
+ revenue_growth = results.get('revenue_growth_yoy') or results.get('revenue_growth_ttm')
69
+ earnings_growth = results.get('net_income_growth_yoy') or results.get('earnings_growth_ttm')
70
+
71
+ results['growth_quality'] = self._assess_growth_quality(revenue_growth, earnings_growth)
72
+
73
+ return results
74
+
75
+ def analyze_margins(self) -> Dict:
76
+ """
77
+ Analyze profitability margins and trends
78
+
79
+ Returns:
80
+ Dictionary with margin metrics
81
+ """
82
+ results = {
83
+ 'gross_margin': self.metrics.get('gross_margin'),
84
+ 'operating_margin': self.metrics.get('operating_margin'),
85
+ 'profit_margin': self.metrics.get('profit_margin'),
86
+ 'ebitda_margin': self.metrics.get('ebitda_margin'),
87
+ }
88
+
89
+ # Calculate margin trends from income statement
90
+ try:
91
+ income_stmt = self.statements.get('income_statement')
92
+ if income_stmt is not None and not income_stmt.empty:
93
+
94
+ # Calculate margins for multiple periods
95
+ if 'Total Revenue' in income_stmt.index:
96
+ revenues = income_stmt.loc['Total Revenue'].values
97
+
98
+ # Gross margin trend
99
+ if 'Gross Profit' in income_stmt.index and len(revenues) >= 2:
100
+ gross_profits = income_stmt.loc['Gross Profit'].values
101
+ margins = [gp / rev if rev != 0 else None for gp, rev in zip(gross_profits, revenues)]
102
+ margins = [m for m in margins if m is not None]
103
+
104
+ if len(margins) >= 2:
105
+ results['gross_margin_current'] = margins[0]
106
+ results['gross_margin_trend'] = margins[0] - margins[1]
107
+ results['gross_margin_stable'] = abs(margins[0] - margins[1]) < 0.02 # Within 2%
108
+
109
+ # Operating margin trend
110
+ if 'Operating Income' in income_stmt.index and len(revenues) >= 2:
111
+ op_incomes = income_stmt.loc['Operating Income'].values
112
+ margins = [oi / rev if rev != 0 else None for oi, rev in zip(op_incomes, revenues)]
113
+ margins = [m for m in margins if m is not None]
114
+
115
+ if len(margins) >= 2:
116
+ results['operating_margin_current'] = margins[0]
117
+ results['operating_margin_trend'] = margins[0] - margins[1]
118
+ results['operating_leverage'] = margins[0] > margins[1] # Expanding margins
119
+
120
+ # Net margin trend
121
+ if 'Net Income' in income_stmt.index and len(revenues) >= 2:
122
+ net_incomes = income_stmt.loc['Net Income'].values
123
+ margins = [ni / rev if rev != 0 else None for ni, rev in zip(net_incomes, revenues)]
124
+ margins = [m for m in margins if m is not None]
125
+
126
+ if len(margins) >= 2:
127
+ results['net_margin_current'] = margins[0]
128
+ results['net_margin_trend'] = margins[0] - margins[1]
129
+
130
+ except Exception as e:
131
+ print(f"Error calculating margin trends: {e}")
132
+
133
+ # Margin assessment
134
+ results['margin_quality'] = self._assess_margin_quality(results)
135
+
136
+ return results
137
+
138
+ def analyze_returns(self) -> Dict:
139
+ """
140
+ Analyze return on capital metrics
141
+
142
+ Returns:
143
+ Dictionary with return metrics
144
+ """
145
+ results = {
146
+ 'roe': self.metrics.get('return_on_equity'),
147
+ 'roa': self.metrics.get('return_on_assets'),
148
+ }
149
+
150
+ # Calculate ROIC (Return on Invested Capital)
151
+ try:
152
+ income_stmt = self.statements.get('income_statement')
153
+ balance_sheet = self.statements.get('balance_sheet')
154
+
155
+ if income_stmt is not None and balance_sheet is not None:
156
+ if not income_stmt.empty and not balance_sheet.empty:
157
+
158
+ # NOPAT = Net Operating Profit After Tax
159
+ if 'Operating Income' in income_stmt.index and 'Tax Provision' in income_stmt.index:
160
+ op_income = income_stmt.loc['Operating Income'].iloc[0]
161
+ total_revenue = income_stmt.loc['Total Revenue'].iloc[0] if 'Total Revenue' in income_stmt.index else 1
162
+ tax_provision = income_stmt.loc['Tax Provision'].iloc[0]
163
+
164
+ # Estimate tax rate
165
+ pretax_income = income_stmt.loc['Pretax Income'].iloc[0] if 'Pretax Income' in income_stmt.index else op_income
166
+ tax_rate = abs(tax_provision / pretax_income) if pretax_income != 0 else 0.21
167
+
168
+ nopat = op_income * (1 - tax_rate)
169
+
170
+ # Invested Capital = Total Debt + Total Equity - Cash
171
+ total_debt = self.metrics.get('total_debt', 0)
172
+ total_assets = balance_sheet.loc['Total Assets'].iloc[0] if 'Total Assets' in balance_sheet.index else 0
173
+ total_liabilities = balance_sheet.loc['Total Liabilities Net Minority Interest'].iloc[0] if 'Total Liabilities Net Minority Interest' in balance_sheet.index else 0
174
+ equity = total_assets - total_liabilities
175
+ cash = self.metrics.get('total_cash', 0)
176
+
177
+ invested_capital = total_debt + equity - cash
178
+
179
+ if invested_capital > 0:
180
+ results['roic'] = nopat / invested_capital
181
+
182
+ except Exception as e:
183
+ print(f"Error calculating ROIC: {e}")
184
+
185
+ # Returns assessment
186
+ results['returns_quality'] = self._assess_returns_quality(results)
187
+
188
+ return results
189
+
190
+ def analyze_cash_flow(self) -> Dict:
191
+ """
192
+ Analyze cash flow metrics
193
+
194
+ Returns:
195
+ Dictionary with cash flow metrics
196
+ """
197
+ results = {
198
+ 'operating_cash_flow': self.metrics.get('operating_cash_flow', 0),
199
+ 'free_cash_flow': self.metrics.get('free_cash_flow', 0),
200
+ }
201
+
202
+ # Calculate FCF margin and conversion
203
+ try:
204
+ income_stmt = self.statements.get('income_statement')
205
+ cashflow_stmt = self.statements.get('cash_flow')
206
+
207
+ if income_stmt is not None and not income_stmt.empty:
208
+ revenue = income_stmt.loc['Total Revenue'].iloc[0] if 'Total Revenue' in income_stmt.index else 0
209
+ net_income = income_stmt.loc['Net Income'].iloc[0] if 'Net Income' in income_stmt.index else 0
210
+
211
+ if revenue > 0:
212
+ results['fcf_margin'] = results['free_cash_flow'] / revenue
213
+ results['ocf_margin'] = results['operating_cash_flow'] / revenue
214
+
215
+ if net_income != 0:
216
+ results['fcf_conversion'] = results['free_cash_flow'] / net_income
217
+
218
+ # Cash flow trend
219
+ if cashflow_stmt is not None and not cashflow_stmt.empty:
220
+ if 'Free Cash Flow' in cashflow_stmt.index:
221
+ fcf_values = cashflow_stmt.loc['Free Cash Flow'].values
222
+ if len(fcf_values) >= 2:
223
+ results['fcf_growth'] = ((fcf_values[0] - fcf_values[1]) / abs(fcf_values[1])) if fcf_values[1] != 0 else None
224
+ results['fcf_positive_trend'] = fcf_values[0] > fcf_values[1]
225
+
226
+ except Exception as e:
227
+ print(f"Error calculating cash flow metrics: {e}")
228
+
229
+ # Cash flow assessment
230
+ results['cash_flow_quality'] = self._assess_cash_flow_quality(results)
231
+
232
+ return results
233
+
234
+ def analyze_financial_health(self) -> Dict:
235
+ """
236
+ Analyze financial health and leverage
237
+
238
+ Returns:
239
+ Dictionary with financial health metrics
240
+ """
241
+ results = {
242
+ 'total_cash': self.metrics.get('total_cash', 0),
243
+ 'total_debt': self.metrics.get('total_debt', 0),
244
+ 'debt_to_equity': self.metrics.get('debt_to_equity'),
245
+ 'current_ratio': self.metrics.get('current_ratio'),
246
+ 'quick_ratio': self.metrics.get('quick_ratio'),
247
+ }
248
+
249
+ # Calculate net debt
250
+ results['net_debt'] = results['total_debt'] - results['total_cash']
251
+
252
+ # Calculate interest coverage
253
+ try:
254
+ income_stmt = self.statements.get('income_statement')
255
+ if income_stmt is not None and not income_stmt.empty:
256
+ if 'Operating Income' in income_stmt.index and 'Interest Expense' in income_stmt.index:
257
+ op_income = income_stmt.loc['Operating Income'].iloc[0]
258
+ interest = abs(income_stmt.loc['Interest Expense'].iloc[0])
259
+
260
+ if interest > 0:
261
+ results['interest_coverage'] = op_income / interest
262
+
263
+ except Exception as e:
264
+ print(f"Error calculating interest coverage: {e}")
265
+
266
+ # Health assessment
267
+ results['financial_health_quality'] = self._assess_financial_health(results)
268
+
269
+ return results
270
+
271
+ def _assess_growth_quality(self, revenue_growth: Optional[float], earnings_growth: Optional[float]) -> str:
272
+ """Assess growth quality"""
273
+ if revenue_growth is None or earnings_growth is None:
274
+ return "Insufficient Data"
275
+
276
+ if revenue_growth > 0.15 and earnings_growth > 0.15:
277
+ return "Strong"
278
+ elif revenue_growth > 0.08 and earnings_growth > 0.08:
279
+ return "Good"
280
+ elif revenue_growth > 0 and earnings_growth > 0:
281
+ return "Moderate"
282
+ else:
283
+ return "Weak"
284
+
285
+ def _assess_margin_quality(self, margins: Dict) -> str:
286
+ """Assess margin quality"""
287
+ operating_margin = margins.get('operating_margin_current') or margins.get('operating_margin')
288
+ margin_trend = margins.get('operating_margin_trend')
289
+
290
+ if operating_margin is None:
291
+ return "Insufficient Data"
292
+
293
+ if operating_margin > 0.20 and (margin_trend is None or margin_trend >= 0):
294
+ return "Excellent"
295
+ elif operating_margin > 0.10 and (margin_trend is None or margin_trend >= 0):
296
+ return "Good"
297
+ elif operating_margin > 0.05:
298
+ return "Moderate"
299
+ else:
300
+ return "Weak"
301
+
302
+ def _assess_returns_quality(self, returns: Dict) -> str:
303
+ """Assess returns quality"""
304
+ roe = returns.get('roe')
305
+ roic = returns.get('roic')
306
+
307
+ primary_return = roic if roic is not None else roe
308
+
309
+ if primary_return is None:
310
+ return "Insufficient Data"
311
+
312
+ if primary_return > 0.20:
313
+ return "Excellent"
314
+ elif primary_return > 0.15:
315
+ return "Good"
316
+ elif primary_return > 0.10:
317
+ return "Moderate"
318
+ else:
319
+ return "Weak"
320
+
321
+ def _assess_cash_flow_quality(self, cash_flow: Dict) -> str:
322
+ """Assess cash flow quality"""
323
+ fcf = cash_flow.get('free_cash_flow', 0)
324
+ fcf_conversion = cash_flow.get('fcf_conversion')
325
+
326
+ if fcf <= 0:
327
+ return "Negative"
328
+
329
+ if fcf_conversion and fcf_conversion > 1.0:
330
+ return "Excellent"
331
+ elif fcf_conversion and fcf_conversion > 0.8:
332
+ return "Good"
333
+ elif fcf > 0:
334
+ return "Moderate"
335
+ else:
336
+ return "Weak"
337
+
338
+ def _assess_financial_health(self, health: Dict) -> str:
339
+ """Assess financial health"""
340
+ net_debt = health.get('net_debt', 0)
341
+ current_ratio = health.get('current_ratio')
342
+ interest_coverage = health.get('interest_coverage')
343
+
344
+ # Net cash position is excellent
345
+ if net_debt < 0:
346
+ return "Excellent"
347
+
348
+ # Check liquidity and debt coverage
349
+ healthy_liquidity = current_ratio and current_ratio > 1.5
350
+ healthy_coverage = interest_coverage and interest_coverage > 3
351
+
352
+ if healthy_liquidity and healthy_coverage:
353
+ return "Good"
354
+ elif (current_ratio and current_ratio > 1.0) or (interest_coverage and interest_coverage > 1.5):
355
+ return "Moderate"
356
+ else:
357
+ return "Weak"
358
+
359
+ def generate_summary(self) -> Dict:
360
+ """
361
+ Generate comprehensive analysis summary
362
+
363
+ Returns:
364
+ Complete analysis results
365
+ """
366
+ return {
367
+ 'ticker': self.ticker,
368
+ 'company_name': self.company_info.get('company_name', self.ticker),
369
+ 'sector': self.company_info.get('sector', 'Unknown'),
370
+ 'industry': self.company_info.get('industry', 'Unknown'),
371
+ 'analysis_date': datetime.now().isoformat(),
372
+ 'growth_analysis': self.analyze_growth(),
373
+ 'margin_analysis': self.analyze_margins(),
374
+ 'returns_analysis': self.analyze_returns(),
375
+ 'cash_flow_analysis': self.analyze_cash_flow(),
376
+ 'financial_health': self.analyze_financial_health()
377
+ }
378
+
379
+
380
+ if __name__ == "__main__":
381
+ # Test with sample data
382
+ print("This module is meant to be imported and used with data from data_fetcher.py")
src/fundamental_analysis/main.py ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Main Entry Point for Stock Analysis
3
+ Simple interface to analyze stocks and get BUY/SELL/HOLD recommendations
4
+ """
5
+
6
+ from fundamental_analysis.decision_maker import evaluate_stock
7
+ import sys
8
+
9
+
10
+ def main():
11
+ """Main function for stock analysis"""
12
+ # Get ticker from command line or user input
13
+ if len(sys.argv) > 1:
14
+ ticker = sys.argv[1].upper()
15
+ else:
16
+ ticker = input("\nEnter stock ticker (e.g., AAPL, GOOGL, MSFT): ").strip().upper()
17
+ if not ticker:
18
+ print("Error: No ticker provided")
19
+ return
20
+ # Ask about sector comparison
21
+ if len(sys.argv) > 2 and sys.argv[2].lower() in ['--no-sector', '-n']:
22
+ compare_sector = False
23
+ else:
24
+ use_sector = input("\nCompare to sector peers? (Y/n): ").strip().lower()
25
+ compare_sector = use_sector != 'n'
26
+ # Analyze the stock
27
+ result = evaluate_stock(ticker, compare_to_sector=compare_sector)
28
+ return result
29
+
30
+
31
+ if __name__ == "__main__":
32
+ main()
src/fundamental_analysis/metrics.py ADDED
@@ -0,0 +1,616 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Individual metric calculation functions.
3
+ Each function is self-contained and follows formulas from stock_evaluation_formulas_and_process.md
4
+ """
5
+
6
+ import pandas as pd
7
+ import numpy as np
8
+ from typing import Dict, Optional, Tuple
9
+
10
+
11
+ # ============================================================================
12
+ # 1. BUILDING BLOCKS & DATA EXTRACTION
13
+ # ============================================================================
14
+
15
+ def get_diluted_shares(info: Dict, financials: Dict) -> Optional[float]:
16
+ """Get diluted shares outstanding"""
17
+ return info.get('sharesOutstanding') or financials.get('shares_outstanding')
18
+
19
+
20
+ def get_eps_ttm(info: Dict) -> Optional[float]:
21
+ """Earnings Per Share - Trailing Twelve Months"""
22
+ return info.get('trailingEps')
23
+
24
+
25
+ def get_eps_forward(info: Dict) -> Optional[float]:
26
+ """Earnings Per Share - Forward (Next 12 months)"""
27
+ return info.get('forwardEps')
28
+
29
+
30
+ def get_book_value_per_share(info: Dict) -> Optional[float]:
31
+ """Book Value Per Share"""
32
+ return info.get('bookValue')
33
+
34
+
35
+ # ============================================================================
36
+ # 2. VALUATION METRICS
37
+ # ============================================================================
38
+
39
+ def calculate_market_cap(price: float, diluted_shares: float) -> Optional[float]:
40
+ """
41
+ Market Capitalization
42
+ Formula: MktCap = Share Price × Diluted Shares
43
+ """
44
+ if type(price) in [int, float] and type(diluted_shares) in [int, float]:
45
+ return price * diluted_shares
46
+ return None
47
+
48
+
49
+ def calculate_enterprise_value(market_cap: float, total_debt: float,
50
+ cash: float, minority_interest: float = 0,
51
+ preferred_stock: float = 0) -> Optional[float]:
52
+ """
53
+ Enterprise Value
54
+ Formula: EV = MktCap + Total Debt + Minority Interest + Preferred Stock − Cash
55
+
56
+ Explanation: Total value of firm's operating assets available to all capital providers
57
+ """
58
+ if type(market_cap) in [int, float] and type(total_debt) in [int, float] and \
59
+ type(cash) in [int, float] and type(minority_interest) in [int, float] and \
60
+ type(preferred_stock) in [int, float]:
61
+ return market_cap + total_debt + minority_interest + preferred_stock - cash
62
+ return None
63
+
64
+
65
+ def calculate_pe_ratio(price: float, eps: float) -> Optional[float]:
66
+ """
67
+ Price-to-Earnings Ratio
68
+ Formula: P/E = Share Price / EPS
69
+
70
+ Explanation: How many dollars investors pay per dollar of earnings
71
+ """
72
+ if type(price) in [int, float] and type(eps) in [int, float]:
73
+ if eps and eps > 0:
74
+ return price / eps
75
+ return None
76
+
77
+
78
+ def calculate_peg_ratio(pe_ratio: float, eps_growth_rate: float) -> Optional[float]:
79
+ """
80
+ PEG Ratio (Price/Earnings-to-Growth)
81
+ Formula: PEG = P/E / (Expected EPS annual growth rate in %)
82
+
83
+ Explanation: Normalizes PE by growth. PEG ≈ 1 implies valuation equals growth
84
+ Threshold: PEG < 1 suggests value, PEG > 1.5 suggests expensive
85
+ """
86
+ if type(pe_ratio) in [int, float] and type(eps_growth_rate) in [int, float]:
87
+ if pe_ratio and eps_growth_rate and eps_growth_rate > 0:
88
+ return pe_ratio / (eps_growth_rate * 100)
89
+ return None
90
+
91
+
92
+ def calculate_ev_ebitda(enterprise_value: float, ebitda: float) -> Optional[float]:
93
+ """
94
+ EV/EBITDA Multiple
95
+ Formula: EV/EBITDA = Enterprise Value / EBITDA
96
+
97
+ Explanation: Capital-structure neutral valuation, useful for comparing
98
+ companies with different leverage
99
+ """
100
+ if type(enterprise_value) in [int, float] and type(ebitda) in [int, float]:
101
+ if ebitda and ebitda > 0:
102
+ return enterprise_value / ebitda
103
+ return None
104
+
105
+
106
+ def calculate_price_to_fcf(market_cap: float, free_cash_flow: float) -> Optional[float]:
107
+ """
108
+ Price to Free Cash Flow
109
+ Formula: P/FCF = Market Cap / Free Cash Flow
110
+
111
+ Explanation: How many dollars investors pay per dollar of free cash flow
112
+ """
113
+ if type(market_cap) in [int, float] and type(free_cash_flow) in [int, float]:
114
+ if free_cash_flow and free_cash_flow > 0:
115
+ return market_cap / free_cash_flow
116
+ return None
117
+
118
+
119
+ def calculate_fcf_yield_equity(free_cash_flow: float, market_cap: float) -> Optional[float]:
120
+ """
121
+ Free Cash Flow Yield (Equity basis)
122
+ Formula: FCF yield = Free Cash Flow / Market Cap
123
+
124
+ Explanation: Cash return on equity investment
125
+ Threshold: > 6% = attractive, < 3% = expensive
126
+ Priority: HIGHEST - measures actual cash generation
127
+ """
128
+ if type(free_cash_flow) in [int, float] and type(market_cap) in [int, float]:
129
+ if market_cap and market_cap > 0:
130
+ return (free_cash_flow / market_cap) * 100 # Return as percentage
131
+ return None
132
+
133
+
134
+ def calculate_fcf_yield_enterprise(free_cash_flow: float, enterprise_value: float) -> Optional[float]:
135
+ """
136
+ Free Cash Flow Yield (Enterprise basis)
137
+ Formula: FCF yield = Free Cash Flow / EV
138
+
139
+ Explanation: Cash return on enterprise value (preferred for comparing
140
+ leveraged vs unleveraged firms)
141
+ Threshold: > 6% = Buy candidate, 4-6% = Hold, < 3% = Unattractive
142
+ """
143
+ if type(free_cash_flow) in [int, float] and type(enterprise_value) in [int, float]:
144
+ if enterprise_value and enterprise_value > 0:
145
+ return (free_cash_flow / enterprise_value) * 100 # Return as percentage
146
+ return None
147
+
148
+
149
+ def calculate_price_to_book(price: float, book_value_per_share: float) -> Optional[float]:
150
+ """
151
+ Price-to-Book Ratio
152
+ Formula: P/B = Price / Book Value per Share
153
+
154
+ Explanation: Valuation relative to net asset value
155
+ """
156
+ if type(price) in [int, float] and type(book_value_per_share) in [int, float]:
157
+ if book_value_per_share and book_value_per_share > 0:
158
+ return price / book_value_per_share
159
+ return None
160
+
161
+
162
+ # ============================================================================
163
+ # 3. PROFITABILITY & MARGIN METRICS
164
+ # ============================================================================
165
+
166
+ def calculate_gross_margin(revenue: float, cogs: float) -> Optional[float]:
167
+ """
168
+ Gross Margin
169
+ Formula: Gross margin = (Revenue − COGS) / Revenue
170
+
171
+ Explanation: Unit economics - whether price > cost. Rising gross margins
172
+ imply pricing power or scale
173
+ """
174
+ if type(revenue) in [int, float] and type(cogs) in [int, float]:
175
+ if revenue and revenue > 0:
176
+ return ((revenue - cogs) / revenue) * 100
177
+ return None
178
+
179
+
180
+ def calculate_ebitda_margin(ebitda: float, revenue: float) -> Optional[float]:
181
+ """
182
+ EBITDA Margin
183
+ Formula: EBITDA margin = EBITDA / Revenue
184
+
185
+ Explanation: Operational cash profitability before capital structure and non-cash charges
186
+ """
187
+ if type(ebitda) in [int, float] and type(revenue) in [int, float]:
188
+ if revenue and revenue > 0:
189
+ return (ebitda / revenue) * 100
190
+ return None
191
+
192
+
193
+ def calculate_ebit_margin(ebit: float, revenue: float) -> Optional[float]:
194
+ """
195
+ EBIT (Operating) Margin
196
+ Formula: EBIT margin = EBIT / Revenue
197
+
198
+ Explanation: Includes depreciation/amortization; useful for capital-intensive businesses
199
+ """
200
+ if type(ebit) in [int, float] and type(revenue) in [int, float]:
201
+ if revenue and revenue > 0:
202
+ return (ebit / revenue) * 100
203
+ return None
204
+
205
+
206
+ def calculate_net_margin(net_income: float, revenue: float) -> Optional[float]:
207
+ """
208
+ Net Margin
209
+ Formula: Net margin = Net Income / Revenue
210
+
211
+ Explanation: True bottom-line profitability after interest and tax
212
+ """
213
+ if type(net_income) in [int, float] and type(revenue) in [int, float]:
214
+ if revenue and revenue > 0:
215
+ return (net_income / revenue) * 100
216
+ return None
217
+
218
+
219
+ # ============================================================================
220
+ # 4. CASH FLOW METRICS
221
+ # ============================================================================
222
+
223
+ def calculate_free_cash_flow(cash_from_operations: float, capex: float) -> Optional[float]:
224
+ """
225
+ Free Cash Flow
226
+ Formula: FCF = Cash from Operations − Capital Expenditures
227
+
228
+ Explanation: Actual cash business generates after reinvestment.
229
+ This is the ULTIMATE value driver.
230
+ """
231
+ if type(cash_from_operations) in [int, float] and type(capex) in [int, float]:
232
+ return cash_from_operations - abs(capex) # CapEx is usually negative
233
+ return None
234
+
235
+
236
+ def calculate_fcf_per_share(free_cash_flow: float, diluted_shares: float) -> Optional[float]:
237
+ """
238
+ Free Cash Flow per Share
239
+ Formula: FCF per share = Free Cash Flow / Diluted Shares
240
+
241
+ Explanation: Per-share cash generation, adjusted for dilution
242
+ """
243
+ if type(free_cash_flow) in [int, float] and type(diluted_shares) in [int, float]:
244
+ if diluted_shares and diluted_shares > 0:
245
+ return free_cash_flow / diluted_shares
246
+ return None
247
+
248
+
249
+ def calculate_cash_conversion(cfo: float, net_income: float) -> Optional[float]:
250
+ """
251
+ Cash Conversion Ratio
252
+ Formula: Cash Conversion = CFO / Net Income
253
+
254
+ Explanation: Measures quality of earnings. Should be > 1.0
255
+ RED FLAG if consistently < 1.0 (profits not converting to cash)
256
+ """
257
+ if type(cfo) in [int, float] and type(net_income) in [int, float]:
258
+ if net_income and net_income != 0:
259
+ return cfo / net_income
260
+ return None
261
+
262
+
263
+ # ============================================================================
264
+ # 5. LIQUIDITY & SOLVENCY METRICS
265
+ # ============================================================================
266
+
267
+ def calculate_current_ratio(current_assets: float, current_liabilities: float) -> Optional[float]:
268
+ """
269
+ Current Ratio
270
+ Formula: Current ratio = Current Assets / Current Liabilities
271
+
272
+ Explanation: Basic short-term liquidity. > 1 is typical
273
+ """
274
+ if type(current_assets) in [int, float] and type(current_liabilities) in [int, float]:
275
+ if current_liabilities and current_liabilities > 0:
276
+ return current_assets / current_liabilities
277
+ return None
278
+
279
+
280
+ def calculate_quick_ratio(cash: float, marketable_securities: float,
281
+ receivables: float, current_liabilities: float) -> Optional[float]:
282
+ """
283
+ Quick Ratio (Acid Test)
284
+ Formula: Quick ratio = (Cash + Marketable Securities + Receivables) / Current Liabilities
285
+
286
+ Explanation: Stricter liquidity measure excluding inventory
287
+ """
288
+ if type(cash) in [int, float] and type(marketable_securities) in [int, float] and \
289
+ type(receivables) in [int, float] and type(current_liabilities) in [int, float]:
290
+ if current_liabilities and current_liabilities > 0:
291
+ return (cash + marketable_securities + receivables) / current_liabilities
292
+ return None
293
+
294
+
295
+ def calculate_net_debt_to_ebitda(total_debt: float, cash: float, ebitda: float) -> Optional[float]:
296
+ """
297
+ Net Debt to EBITDA
298
+ Formula: Net debt / EBITDA = (Total Debt − Cash) / EBITDA
299
+
300
+ Explanation: Years of EBITDA required to pay net debt
301
+ Thresholds:
302
+ - < 1.0: Low risk
303
+ - 1-3: Moderate
304
+ - > 3: High risk (alert if > 4-5)
305
+ """
306
+ if type(total_debt) in [int, float] and type(cash) in [int, float] and type(ebitda) in [int, float]:
307
+ if ebitda and ebitda > 0:
308
+ net_debt = total_debt - cash
309
+ return net_debt / ebitda
310
+ return None
311
+
312
+
313
+ def calculate_interest_coverage(ebit: float, interest_expense: float) -> Optional[float]:
314
+ """
315
+ Interest Coverage Ratio
316
+ Formula: Interest coverage = EBIT / Interest Expense
317
+
318
+ Explanation: Ability to pay interest from operating earnings
319
+ Threshold: > 3x is safe, < 2x is risky
320
+ """
321
+ if type(ebit) in [int, float] and type(interest_expense) in [int, float]:
322
+ if interest_expense and interest_expense > 0:
323
+ return ebit / interest_expense
324
+ return None
325
+
326
+
327
+ def calculate_debt_to_equity(total_debt: float, total_equity: float) -> Optional[float]:
328
+ """
329
+ Debt-to-Equity Ratio
330
+ Formula: Debt-to-Equity = Total Debt / Total Equity
331
+
332
+ Explanation: Capital structure leverage
333
+ """
334
+ if type(total_debt) in [int, float] and type(total_equity) in [int, float]:
335
+ if total_equity and total_equity > 0:
336
+ return total_debt / total_equity
337
+ return None
338
+
339
+
340
+ # ============================================================================
341
+ # 6. RETURN & EFFICIENCY METRICS
342
+ # ============================================================================
343
+
344
+ def calculate_roe(net_income: float, avg_shareholders_equity: float) -> Optional[float]:
345
+ """
346
+ Return on Equity
347
+ Formula: ROE = Net Income / Average Shareholders' Equity
348
+
349
+ Explanation: Returns delivered to equity holders
350
+ Threshold: > 15% is good, > 20% is excellent
351
+ Priority: HIGH - quality indicator
352
+ """
353
+ if type(net_income) in [int, float] and type(avg_shareholders_equity) in [int, float]:
354
+ if avg_shareholders_equity and avg_shareholders_equity > 0:
355
+ return (net_income / avg_shareholders_equity) * 100
356
+ return None
357
+
358
+
359
+ def calculate_roa(net_income: float, total_assets: float) -> Optional[float]:
360
+ """
361
+ Return on Assets
362
+ Formula: ROA = Net Income / Total Assets
363
+
364
+ Explanation: Asset utilization efficiency
365
+ """
366
+ if type(net_income) in [int, float] and type(total_assets) in [int, float]:
367
+ if total_assets and total_assets > 0:
368
+ return (net_income / total_assets) * 100
369
+ return None
370
+
371
+
372
+ def calculate_roic(ebit: float, tax_rate: float, invested_capital: float) -> Optional[float]:
373
+ """
374
+ Return on Invested Capital
375
+ Formula:
376
+ NOPAT = EBIT × (1 − Tax Rate)
377
+ ROIC = NOPAT / Invested Capital
378
+ Where: Invested Capital = Total Equity + Total Debt - Cash
379
+
380
+ Explanation: Operating return on all capital (equity + debt)
381
+ Thresholds:
382
+ - > 10%: Good
383
+ - > 15%: Excellent (high-quality business)
384
+ - Trending up: High-return projects
385
+ Priority: VERY HIGH - best measure of capital efficiency
386
+ """
387
+ if type(ebit) in [int, float] and type(tax_rate) in [int, float] and type(invested_capital) in [int, float]:
388
+ if invested_capital and invested_capital > 0:
389
+ nopat = ebit * (1 - tax_rate)
390
+ return (nopat / invested_capital) * 100
391
+ return None
392
+
393
+
394
+ def calculate_invested_capital(total_equity: float, total_debt: float, cash: float) -> Optional[float]:
395
+ """
396
+ Invested Capital
397
+ Formula: Invested Capital = Total Equity + Total Debt - Cash
398
+
399
+ Explanation: Total capital deployed in operations
400
+ """
401
+ if type(total_equity) in [int, float] and type(total_debt) in [int, float] and type(cash) in [int, float]:
402
+ return total_equity + total_debt - cash
403
+ return None
404
+
405
+
406
+ # ============================================================================
407
+ # 7. GROWTH METRICS
408
+ # ============================================================================
409
+
410
+ def calculate_revenue_growth(current_revenue: float, prior_revenue: float) -> Optional[float]:
411
+ """
412
+ Revenue Growth Rate (YoY)
413
+ Formula: Revenue Growth = (Current Revenue - Prior Revenue) / Prior Revenue
414
+
415
+ Explanation: Top-line growth momentum
416
+ """
417
+ if type(current_revenue) in [int, float] and type(prior_revenue) in [int, float]:
418
+ if prior_revenue and prior_revenue > 0:
419
+ return ((current_revenue - prior_revenue) / prior_revenue) * 100
420
+ return None
421
+
422
+
423
+ def calculate_eps_growth(current_eps: float, prior_eps: float) -> Optional[float]:
424
+ """
425
+ EPS Growth Rate (YoY)
426
+ Formula: EPS Growth = (Current EPS - Prior EPS) / Prior EPS
427
+
428
+ Explanation: Bottom-line growth momentum
429
+ """
430
+ if type(current_eps) in [int, float] and type(prior_eps) in [int, float]:
431
+ if prior_eps and prior_eps > 0:
432
+ return ((current_eps - prior_eps) / prior_eps) * 100
433
+ return None
434
+
435
+
436
+ def calculate_cagr(ending_value: float, beginning_value: float, years: int) -> Optional[float]:
437
+ """
438
+ Compound Annual Growth Rate
439
+ Formula: CAGR = (Ending Value / Beginning Value)^(1/years) - 1
440
+
441
+ Explanation: Smoothed annual growth rate over period
442
+ """
443
+ if type(ending_value) in [int, float] and type(beginning_value) in [int, float] and type(years) in [int, float]:
444
+ if beginning_value and beginning_value > 0 and years > 0:
445
+ return (((ending_value / beginning_value) ** (1 / years)) - 1) * 100
446
+ return None
447
+
448
+
449
+ def calculate_sustainable_growth(roe: float, payout_ratio: float) -> Optional[float]:
450
+ """
451
+ Sustainable EPS Growth
452
+ Formula: Sustainable Growth = ROE × (1 - Payout Ratio)
453
+ Or: Sustainable Growth = ROE × Reinvestment Rate
454
+
455
+ Explanation: Theoretical growth rate based on returns and reinvestment
456
+ """
457
+ if type(roe) in [int, float] and type(payout_ratio) in [int, float]:
458
+ reinvestment_rate = 1 - payout_ratio
459
+ return roe * reinvestment_rate
460
+ return None
461
+
462
+
463
+ # ============================================================================
464
+ # 8. CAPITAL ALLOCATION METRICS
465
+ # ============================================================================
466
+
467
+ def calculate_payout_ratio(dividends: float, net_income: float) -> Optional[float]:
468
+ """
469
+ Payout Ratio
470
+ Formula: Payout Ratio = Dividends / Net Income
471
+
472
+ Explanation: Proportion of earnings paid as dividends
473
+ """
474
+ if type(dividends) in [int, float] and type(net_income) in [int, float]:
475
+ if net_income and net_income > 0:
476
+ return (dividends / net_income) * 100
477
+ return None
478
+
479
+
480
+ def calculate_buyback_yield(buyback_cash: float, market_cap: float) -> Optional[float]:
481
+ """
482
+ Buyback Yield
483
+ Formula: Buyback Yield = Cash spent on repurchases / Market Cap
484
+
485
+ Explanation: Return to shareholders via buybacks
486
+ """
487
+ if type(buyback_cash) in [int, float] and type(market_cap) in [int, float]:
488
+ if market_cap and market_cap > 0:
489
+ return (buyback_cash / market_cap) * 100
490
+ return None
491
+
492
+
493
+ def calculate_dilution_rate(current_shares: float, prior_shares: float) -> Optional[float]:
494
+ """
495
+ Dilution Rate
496
+ Formula: Dilution = (Current Shares - Prior Shares) / Prior Shares
497
+
498
+ Explanation: Share count increase (dilution) or decrease (buybacks)
499
+ Negative value = buybacks, Positive value = dilution
500
+ """
501
+ if type(current_shares) in [int, float] and type(prior_shares) in [int, float]:
502
+ if prior_shares and prior_shares > 0:
503
+ return ((current_shares - prior_shares) / prior_shares) * 100
504
+ return None
505
+
506
+
507
+ def calculate_total_payout_ratio(dividends: float, buybacks: float, net_income: float) -> Optional[float]:
508
+ """
509
+ Total Payout Ratio (including buybacks)
510
+ Formula: Total Payout = (Dividends + Buybacks) / Net Income
511
+
512
+ Explanation: Total cash returned to shareholders
513
+ """
514
+ if type(dividends) in [int, float] and type(buybacks) in [int, float] and type(net_income) in [int, float]:
515
+ if net_income and net_income > 0:
516
+ return ((dividends + buybacks) / net_income) * 100
517
+ return None
518
+
519
+
520
+ # ============================================================================
521
+ # 9. EFFICIENCY METRICS
522
+ # ============================================================================
523
+
524
+ def calculate_asset_turnover(revenue: float, total_assets: float) -> Optional[float]:
525
+ """
526
+ Asset Turnover
527
+ Formula: Asset Turnover = Revenue / Total Assets
528
+
529
+ Explanation: How efficiently assets generate revenue
530
+ """
531
+ if type(revenue) in [int, float] and type(total_assets) in [int, float]:
532
+ if total_assets and total_assets > 0:
533
+ return revenue / total_assets
534
+ return None
535
+
536
+
537
+ def calculate_inventory_turnover(cogs: float, avg_inventory: float) -> Optional[float]:
538
+ """
539
+ Inventory Turnover
540
+ Formula: Inventory Turnover = COGS / Average Inventory
541
+
542
+ Explanation: How quickly inventory is sold
543
+ """
544
+ if type(cogs) in [int, float] and type(avg_inventory) in [int, float]:
545
+ if avg_inventory and avg_inventory > 0:
546
+ return cogs / avg_inventory
547
+ return None
548
+
549
+
550
+ def calculate_receivables_turnover(revenue: float, avg_receivables: float) -> Optional[float]:
551
+ """
552
+ Receivables Turnover
553
+ Formula: Receivables Turnover = Revenue / Average Receivables
554
+
555
+ Explanation: How quickly receivables are collected
556
+ """
557
+ if type(revenue) in [int, float] and type(avg_receivables) in [int, float]:
558
+ if avg_receivables and avg_receivables > 0:
559
+ return revenue / avg_receivables
560
+ return None
561
+
562
+
563
+ # ============================================================================
564
+ # 10. WORKING CAPITAL METRICS
565
+ # ============================================================================
566
+
567
+ def calculate_working_capital(current_assets: float, current_liabilities: float) -> Optional[float]:
568
+ """
569
+ Working Capital
570
+ Formula: Working Capital = Current Assets - Current Liabilities
571
+
572
+ Explanation: Short-term operational liquidity
573
+ """
574
+ if type(current_assets) in [int, float] and type(current_liabilities) in [int, float]:
575
+ return current_assets - current_liabilities
576
+ return None
577
+
578
+
579
+ def calculate_working_capital_ratio(working_capital: float, revenue: float) -> Optional[float]:
580
+ """
581
+ Working Capital to Revenue
582
+ Formula: WC Ratio = Working Capital / Revenue
583
+
584
+ Explanation: Capital tied up in operations
585
+ """
586
+ if type(working_capital) in [int, float] and type(revenue) in [int, float]:
587
+ if revenue and revenue > 0:
588
+ return (working_capital / revenue) * 100
589
+ return None
590
+
591
+
592
+ # ============================================================================
593
+ # HELPER FUNCTIONS
594
+ # ============================================================================
595
+
596
+ def safe_divide(numerator: float, denominator: float,
597
+ default: Optional[float] = None) -> Optional[float]:
598
+ """Safely divide two numbers, returning default if denominator is 0"""
599
+ if type(numerator) in [int, float] and type(denominator) in [int, float]:
600
+ if denominator and denominator != 0:
601
+ return numerator / denominator
602
+ return default
603
+
604
+
605
+ def calculate_average(value1: float, value2: float) -> Optional[float]:
606
+ """Calculate average of two values"""
607
+ if type(value1) in [int, float] and type(value2) in [int, float]:
608
+ return (value1 + value2) / 2
609
+ return None
610
+
611
+
612
+ def annualize_quarterly(quarterly_value: float) -> Optional[float]:
613
+ """Convert quarterly value to annual (TTM)"""
614
+ if type(quarterly_value) in [int, float]:
615
+ return quarterly_value * 4
616
+ return None
src/fundamental_analysis/sector_analyzer.py ADDED
@@ -0,0 +1,415 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Sector Analyzer
3
+ Get all important stocks for a sector and perform peer comparison analysis
4
+ """
5
+
6
+ import yfinance as yf
7
+ import pandas as pd
8
+ import numpy as np
9
+ from typing import List, Dict, Optional, Tuple
10
+ from fundamental_analysis.calculator import calculate_metrics_for_ticker
11
+ import warnings
12
+ warnings.filterwarnings('ignore')
13
+
14
+
15
+ # Predefined sector/industry stock lists
16
+ # These can be expanded or replaced with dynamic fetching from financial APIs
17
+ SECTOR_STOCKS = {
18
+ 'Technology': {
19
+ 'Mega Cap': ['AAPL', 'MSFT', 'GOOGL', 'META', 'NVDA', 'TSLA', 'AVGO', 'ORCL', 'ADBE', 'CRM'],
20
+ 'Large Cap': ['AMD', 'INTC', 'QCOM', 'TXN', 'AMAT', 'ADI', 'LRCX', 'KLAC', 'SNPS', 'CDNS'],
21
+ 'Mid Cap': ['SNOW', 'CRWD', 'NET', 'DDOG', 'ZS', 'OKTA', 'MDB', 'TEAM', 'WDAY', 'PANW']
22
+ },
23
+ 'Financial': {
24
+ 'Banks': ['JPM', 'BAC', 'WFC', 'C', 'GS', 'MS', 'USB', 'PNC', 'TFC', 'SCHW'],
25
+ 'Insurance': ['BRK-B', 'UNH', 'PGR', 'MET', 'PRU', 'AIG', 'ALL', 'TRV', 'AXP', 'CB'],
26
+ 'Asset Management': ['BLK', 'BX', 'KKR', 'APO', 'TROW', 'IVZ', 'BEN', 'AMG']
27
+ },
28
+ 'Healthcare': {
29
+ 'Pharma': ['JNJ', 'PFE', 'ABBV', 'MRK', 'LLY', 'TMO', 'ABT', 'BMY', 'AMGN', 'GILD'],
30
+ 'Biotech': ['VRTX', 'REGN', 'BIIB', 'ILMN', 'MRNA', 'BNTX', 'ALNY', 'SGEN', 'INCY', 'EXAS'],
31
+ 'Medical Devices': ['MDT', 'DHR', 'SYK', 'BSX', 'EW', 'ZBH', 'BAX', 'IDXX', 'ALGN', 'HOLX']
32
+ },
33
+ 'Consumer': {
34
+ 'Retail': ['AMZN', 'WMT', 'HD', 'LOW', 'TGT', 'COST', 'TJX', 'ROST', 'DG', 'DLTR'],
35
+ 'Consumer Goods': ['PG', 'KO', 'PEP', 'PM', 'MDLZ', 'CL', 'EL', 'KMB', 'GIS', 'K'],
36
+ 'Restaurants': ['MCD', 'SBUX', 'CMG', 'YUM', 'QSR', 'DPZ', 'DRI', 'TXRH', 'WING', 'SHAK']
37
+ },
38
+ 'Industrial': {
39
+ 'Aerospace': ['BA', 'RTX', 'LMT', 'NOC', 'GD', 'HON', 'GE', 'TDG', 'HWM', 'LDOS'],
40
+ 'Manufacturing': ['CAT', 'DE', 'ETN', 'EMR', 'ITW', 'ROK', 'PH', 'IR', 'AME', 'XYL'],
41
+ 'Transportation': ['UPS', 'FDX', 'UNP', 'NSC', 'CSX', 'DAL', 'UAL', 'AAL', 'LUV', 'JBHT']
42
+ },
43
+ 'Energy': {
44
+ 'Oil & Gas': ['XOM', 'CVX', 'COP', 'EOG', 'SLB', 'MPC', 'PSX', 'VLO', 'OXY', 'HES'],
45
+ 'Utilities': ['NEE', 'DUK', 'SO', 'D', 'AEP', 'EXC', 'SRE', 'XEL', 'WEC', 'ES']
46
+ },
47
+ 'Materials': ['LIN', 'APD', 'SHW', 'ECL', 'NEM', 'FCX', 'NUE', 'VMC', 'MLM', 'DOW'],
48
+ 'Real Estate': ['PLD', 'AMT', 'CCI', 'EQIX', 'PSA', 'DLR', 'WELL', 'AVB', 'EQR', 'VICI'],
49
+ 'Communication': ['GOOGL', 'META', 'NFLX', 'DIS', 'CMCSA', 'T', 'VZ', 'TMUS', 'CHTR', 'PARA']
50
+ }
51
+
52
+
53
+ class SectorAnalyzer:
54
+ """Analyze stocks within a sector for peer comparison"""
55
+
56
+ def __init__(self, sector: str, subsector: Optional[str] = None):
57
+ """
58
+ Initialize sector analyzer
59
+
60
+ Args:
61
+ sector: Main sector name (e.g., 'Technology', 'Healthcare')
62
+ subsector: Optional subsector/industry (e.g., 'Banks', 'Pharma')
63
+ """
64
+ self.sector = sector
65
+ self.subsector = subsector
66
+ self.tickers = []
67
+ self.metrics_data = {}
68
+
69
+ def get_sector_tickers(self) -> List[str]:
70
+ """
71
+ Get list of tickers for the sector/subsector
72
+
73
+ Returns:
74
+ List of ticker symbols
75
+ """
76
+ if self.sector not in SECTOR_STOCKS:
77
+ print(f"Warning: Sector '{self.sector}' not found in predefined list")
78
+ print(f"Available sectors: {list(SECTOR_STOCKS.keys())}")
79
+ return []
80
+
81
+ sector_data = SECTOR_STOCKS[self.sector]
82
+
83
+ # If sector has subsectors (nested dict)
84
+ if isinstance(sector_data, dict) and any(isinstance(v, list) for v in sector_data.values()):
85
+ if self.subsector:
86
+ if self.subsector in sector_data:
87
+ self.tickers = sector_data[self.subsector]
88
+ else:
89
+ print(f"Warning: Subsector '{self.subsector}' not found")
90
+ print(f"Available subsectors: {list(sector_data.keys())}")
91
+ return []
92
+ else:
93
+ # Flatten all subsectors
94
+ self.tickers = [ticker for subsector_list in sector_data.values()
95
+ for ticker in subsector_list]
96
+ else:
97
+ # Direct list of tickers
98
+ self.tickers = sector_data
99
+
100
+ print(f"Found {len(self.tickers)} tickers for {self.sector}" +
101
+ (f" > {self.subsector}" if self.subsector else ""))
102
+
103
+ return self.tickers
104
+
105
+ def calculate_sector_metrics(self, tickers: Optional[List[str]] = None) -> pd.DataFrame:
106
+ """
107
+ Calculate metrics for all stocks in the sector
108
+
109
+ Args:
110
+ tickers: Optional custom list of tickers (uses sector tickers if None)
111
+
112
+ Returns:
113
+ DataFrame with all stocks and their key metrics
114
+ """
115
+ if tickers is None:
116
+ tickers = self.tickers if self.tickers else self.get_sector_tickers()
117
+
118
+ if not tickers:
119
+ print("No tickers to analyze")
120
+ return pd.DataFrame()
121
+
122
+ print(f"\nCalculating metrics for {len(tickers)} stocks...")
123
+ print("=" * 80)
124
+
125
+ results = []
126
+ failed_tickers = []
127
+
128
+ for i, ticker in enumerate(tickers, 1):
129
+ print(f"\n[{i}/{len(tickers)}] Processing {ticker}...")
130
+
131
+ try:
132
+ metrics_df, summary = calculate_metrics_for_ticker(ticker)
133
+
134
+ if metrics_df.empty:
135
+ print(f"✗ Failed to get data for {ticker}")
136
+ failed_tickers.append(ticker)
137
+ continue
138
+
139
+ # Extract key metrics for comparison
140
+ key_metrics = self._extract_key_metrics(ticker, metrics_df)
141
+ results.append(key_metrics)
142
+
143
+ # Store full metrics for later reference
144
+ self.metrics_data[ticker] = metrics_df
145
+
146
+ except Exception as e:
147
+ print(f"✗ Error processing {ticker}: {str(e)}")
148
+ failed_tickers.append(ticker)
149
+ continue
150
+
151
+ if not results:
152
+ print("\n✗ No data collected for any tickers")
153
+ return pd.DataFrame()
154
+
155
+ # Create comparison DataFrame
156
+ comparison_df = pd.DataFrame(results)
157
+ comparison_df = comparison_df.set_index('Ticker')
158
+
159
+ print("\n" + "=" * 80)
160
+ print(f"✓ Successfully processed {len(results)}/{len(tickers)} stocks")
161
+ if failed_tickers:
162
+ print(f"✗ Failed: {', '.join(failed_tickers)}")
163
+
164
+ return comparison_df
165
+
166
+ def _extract_key_metrics(self, ticker: str, metrics_df: pd.DataFrame) -> Dict:
167
+ """Extract key metrics for peer comparison"""
168
+
169
+ def get_metric_value(metric_name: str) -> Optional[float]:
170
+ """Helper to safely extract metric value"""
171
+ row = metrics_df[metrics_df['Metric'] == metric_name]
172
+ if not row.empty and row.iloc[0]['Status'] == 'Available':
173
+ return row.iloc[0]['Value']
174
+ return None
175
+
176
+ key_metrics = {
177
+ 'Ticker': ticker,
178
+
179
+ # Valuation
180
+ 'Market_Cap': get_metric_value('Market Capitalization'),
181
+ 'PE_Ratio': get_metric_value('P/E Ratio (TTM)'),
182
+ 'PEG_Ratio': get_metric_value('PEG Ratio'),
183
+ 'EV_EBITDA': get_metric_value('EV/EBITDA'),
184
+ 'Price_FCF': get_metric_value('Price / FCF'),
185
+ 'FCF_Yield_%': get_metric_value('FCF Yield (Enterprise) %'),
186
+ 'Price_Book': get_metric_value('Price / Book'),
187
+
188
+ # Profitability
189
+ 'Gross_Margin_%': get_metric_value('Gross Margin %'),
190
+ 'EBITDA_Margin_%': get_metric_value('EBITDA Margin %'),
191
+ 'Net_Margin_%': get_metric_value('Net Margin %'),
192
+
193
+ # Cash Flow
194
+ 'Free_Cash_Flow': get_metric_value('Free Cash Flow'),
195
+ 'Cash_Conversion': get_metric_value('Cash Conversion Ratio'),
196
+
197
+ # Leverage
198
+ 'Net_Debt_EBITDA': get_metric_value('Net Debt / EBITDA'),
199
+ 'Debt_Equity': get_metric_value('Debt / Equity'),
200
+ 'Current_Ratio': get_metric_value('Current Ratio'),
201
+
202
+ # Returns
203
+ 'ROE_%': get_metric_value('Return on Equity (ROE) %'),
204
+ 'ROA_%': get_metric_value('Return on Assets (ROA) %'),
205
+ 'ROIC_%': get_metric_value('Return on Invested Capital (ROIC) %'),
206
+
207
+ # Growth
208
+ 'Revenue_Growth_%': get_metric_value('Revenue Growth (YoY) %'),
209
+ 'EPS_Growth_%': get_metric_value('EPS Growth (YoY) %'),
210
+
211
+ # Capital Allocation
212
+ 'Payout_Ratio_%': get_metric_value('Payout Ratio %'),
213
+ 'Total_Payout_%': get_metric_value('Total Payout Ratio %'),
214
+ }
215
+
216
+ return key_metrics
217
+
218
+ def get_peer_statistics(self, comparison_df: pd.DataFrame) -> pd.DataFrame:
219
+ """
220
+ Calculate sector statistics (median, mean, percentiles)
221
+
222
+ Args:
223
+ comparison_df: DataFrame from calculate_sector_metrics
224
+
225
+ Returns:
226
+ DataFrame with sector statistics
227
+ """
228
+ if comparison_df.empty:
229
+ return pd.DataFrame()
230
+
231
+ stats_df = pd.DataFrame({
232
+ 'Median': comparison_df.median(),
233
+ 'Mean': comparison_df.mean(),
234
+ 'Std_Dev': comparison_df.std(),
235
+ 'Min': comparison_df.min(),
236
+ 'Q1': comparison_df.quantile(0.25),
237
+ 'Q3': comparison_df.quantile(0.75),
238
+ 'Max': comparison_df.max(),
239
+ 'Count': comparison_df.count()
240
+ })
241
+
242
+ return stats_df
243
+
244
+ def compare_stock_to_peers(self, ticker: str, comparison_df: pd.DataFrame) -> pd.DataFrame:
245
+ """
246
+ Compare a specific stock to sector peers
247
+
248
+ Args:
249
+ ticker: Stock to compare
250
+ comparison_df: Sector comparison data
251
+
252
+ Returns:
253
+ DataFrame showing stock vs sector statistics
254
+ """
255
+ if ticker not in comparison_df.index:
256
+ print(f"Ticker {ticker} not found in comparison data")
257
+ return pd.DataFrame()
258
+
259
+ stock_data = comparison_df.loc[ticker]
260
+ sector_stats = self.get_peer_statistics(comparison_df)
261
+
262
+ comparison = pd.DataFrame({
263
+ 'Stock_Value': stock_data,
264
+ 'Sector_Median': sector_stats['Median'],
265
+ 'Sector_Mean': sector_stats['Mean'],
266
+ 'Percentile_Rank': comparison_df.rank(pct=True).loc[ticker] * 100,
267
+ 'vs_Median': ((stock_data - sector_stats['Median']) / sector_stats['Median'] * 100)
268
+ })
269
+
270
+ return comparison
271
+
272
+ def rank_stocks(self, comparison_df: pd.DataFrame,
273
+ metrics: Optional[List[str]] = None) -> pd.DataFrame:
274
+ """
275
+ Rank stocks based on key metrics
276
+
277
+ Args:
278
+ comparison_df: Sector comparison data
279
+ metrics: List of metrics to rank by (None = use default key metrics)
280
+
281
+ Returns:
282
+ DataFrame with rankings
283
+ """
284
+ if comparison_df.empty:
285
+ return pd.DataFrame()
286
+
287
+ # Default key metrics for ranking (higher is better for most)
288
+ if metrics is None:
289
+ metrics = [
290
+ 'FCF_Yield_%', # Higher is better
291
+ 'ROIC_%', # Higher is better
292
+ 'ROE_%', # Higher is better
293
+ 'Revenue_Growth_%', # Higher is better
294
+ 'EPS_Growth_%', # Higher is better
295
+ ]
296
+
297
+ # Reverse ranking for these (lower is better)
298
+ reverse_metrics = [
299
+ 'PE_Ratio',
300
+ 'PEG_Ratio',
301
+ 'EV_EBITDA',
302
+ 'Net_Debt_EBITDA',
303
+ 'Debt_Equity'
304
+ ]
305
+
306
+ # Calculate composite score
307
+ scores = pd.DataFrame(index=comparison_df.index)
308
+
309
+ for metric in metrics:
310
+ if metric in comparison_df.columns:
311
+ # Normalize and rank (higher is better)
312
+ scores[f'{metric}_rank'] = comparison_df[metric].rank(pct=True, na_option='keep')
313
+
314
+ # Calculate average rank across all metrics
315
+ scores['Composite_Score'] = scores.mean(axis=1)
316
+ scores['Rank'] = scores['Composite_Score'].rank(ascending=False, method='min')
317
+
318
+ # Add key metrics for context
319
+ result = scores[['Composite_Score', 'Rank']].copy()
320
+ for metric in metrics:
321
+ if metric in comparison_df.columns:
322
+ result[metric] = comparison_df[metric]
323
+
324
+ return result.sort_values('Rank')
325
+
326
+
327
+ def analyze_sector(sector: str, subsector: Optional[str] = None,
328
+ custom_tickers: Optional[List[str]] = None) -> Tuple[pd.DataFrame, pd.DataFrame, SectorAnalyzer]:
329
+ """
330
+ Main function to analyze a sector
331
+
332
+ Args:
333
+ sector: Sector name
334
+ subsector: Optional subsector name
335
+ custom_tickers: Optional custom list of tickers
336
+
337
+ Returns:
338
+ Tuple of (comparison_df, sector_stats, analyzer_object)
339
+ """
340
+ analyzer = SectorAnalyzer(sector, subsector)
341
+
342
+ if custom_tickers:
343
+ comparison_df = analyzer.calculate_sector_metrics(custom_tickers)
344
+ else:
345
+ analyzer.get_sector_tickers()
346
+ comparison_df = analyzer.calculate_sector_metrics()
347
+
348
+ sector_stats = analyzer.get_peer_statistics(comparison_df)
349
+
350
+ return comparison_df, sector_stats, analyzer
351
+
352
+
353
+ def list_available_sectors() -> None:
354
+ """Print all available sectors and subsectors"""
355
+ print("\nAVAILABLE SECTORS:")
356
+ print("=" * 80)
357
+
358
+ for sector, data in SECTOR_STOCKS.items():
359
+ if isinstance(data, dict) and any(isinstance(v, list) for v in data.values()):
360
+ print(f"\n{sector}:")
361
+ for subsector, tickers in data.items():
362
+ print(f" - {subsector} ({len(tickers)} stocks)")
363
+ else:
364
+ print(f"\n{sector}: {len(data)} stocks")
365
+
366
+
367
+ if __name__ == "__main__":
368
+ # Test with a sample sector
369
+ print("SECTOR ANALYZER TEST")
370
+ print("=" * 80)
371
+
372
+ # Show available sectors
373
+ list_available_sectors()
374
+
375
+ # Test with a small subset of Technology stocks
376
+ print("\n\nTesting with Technology > Mega Cap (first 3 stocks)...")
377
+ print("=" * 80)
378
+
379
+ test_tickers = ['AAPL', 'MSFT', 'GOOGL']
380
+ comparison_df, sector_stats, analyzer = analyze_sector('Technology', custom_tickers=test_tickers)
381
+
382
+ if not comparison_df.empty:
383
+ print("\n" + "=" * 80)
384
+ print("COMPARISON DATA")
385
+ print("=" * 80)
386
+
387
+ # Show key valuation metrics
388
+ valuation_cols = ['Market_Cap', 'PE_Ratio', 'PEG_Ratio', 'FCF_Yield_%', 'ROIC_%']
389
+ print("\nValuation Metrics:")
390
+ print(comparison_df[valuation_cols].to_string())
391
+
392
+ # Show sector statistics
393
+ print("\n" + "=" * 80)
394
+ print("SECTOR STATISTICS")
395
+ print("=" * 80)
396
+ print(sector_stats.loc[valuation_cols].to_string())
397
+
398
+ # Rank stocks
399
+ print("\n" + "=" * 80)
400
+ print("STOCK RANKINGS")
401
+ print("=" * 80)
402
+ rankings = analyzer.rank_stocks(comparison_df)
403
+ print(rankings.to_string())
404
+
405
+ # Compare AAPL to peers
406
+ print("\n" + "=" * 80)
407
+ print("AAPL vs PEERS")
408
+ print("=" * 80)
409
+ aapl_comparison = analyzer.compare_stock_to_peers('AAPL', comparison_df)
410
+ print(aapl_comparison.loc[valuation_cols].to_string())
411
+
412
+ # Save results
413
+ comparison_df.to_csv('sector_comparison.csv')
414
+ sector_stats.to_csv('sector_statistics.csv')
415
+ print("\n✓ Results saved to sector_comparison.csv and sector_statistics.csv")
src/fundamental_analysis/test_analyzer.py ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Quick test script for the fundamental analyzer
3
+ """
4
+
5
+ import sys
6
+ from fundamental_analysis.main import analyze_stock
7
+
8
+ def test_basic_analysis():
9
+ """Test basic analysis with a well-known ticker"""
10
+ print("=" * 80)
11
+ print("Testing Fundamental Analyzer with AAPL")
12
+ print("=" * 80)
13
+ print()
14
+
15
+ try:
16
+ # Test with Apple, comparing against major tech peers
17
+ peer_tickers = ["MSFT", "GOOGL", "META"]
18
+
19
+ print("Running analysis for AAPL with peers: MSFT, GOOGL, META")
20
+ print("-" * 80)
21
+ print()
22
+
23
+ analyze_stock("AAPL", peer_tickers)
24
+
25
+ print()
26
+ print("=" * 80)
27
+ print("✓ Test completed successfully!")
28
+ print("=" * 80)
29
+
30
+ except Exception as e:
31
+ print()
32
+ print("=" * 80)
33
+ print(f"✗ Test failed with error: {type(e).__name__}")
34
+ print(f"Error message: {str(e)}")
35
+ print("=" * 80)
36
+ import traceback
37
+ traceback.print_exc()
38
+ return False
39
+
40
+ return True
41
+
42
+ if __name__ == "__main__":
43
+ success = test_basic_analysis()
44
+ sys.exit(0 if success else 1)
src/fundamental_analysis/valuation_engine.py ADDED
@@ -0,0 +1,330 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Valuation engine for stock analysis.
3
+ Implements DCF, comparable multiples, and scenario analysis.
4
+ """
5
+
6
+ import pandas as pd
7
+ import numpy as np
8
+ from typing import Dict, List, Optional, Tuple
9
+
10
+
11
+ class ValuationEngine:
12
+ """Performs stock valuation using multiple methodologies"""
13
+
14
+ def __init__(self, financial_data: Dict, analysis_results: Dict):
15
+ """
16
+ Initialize valuation engine
17
+
18
+ Args:
19
+ financial_data: Complete dataset from FinancialDataFetcher
20
+ analysis_results: Results from FinancialAnalyzer
21
+ """
22
+ self.ticker = financial_data.get('ticker')
23
+ self.metrics = financial_data.get('metrics', {})
24
+ self.statements = financial_data.get('financial_statements', {})
25
+ self.sector_metrics = financial_data.get('sector_metrics', {})
26
+ self.analysis = analysis_results
27
+
28
+ def calculate_intrinsic_value_dcf(self,
29
+ growth_rate: float = 0.10,
30
+ terminal_growth: float = 0.02,
31
+ discount_rate: float = 0.10,
32
+ years: int = 5) -> Dict:
33
+ """
34
+ Calculate intrinsic value using DCF method
35
+
36
+ Args:
37
+ growth_rate: Expected FCF growth rate
38
+ terminal_growth: Perpetual growth rate
39
+ discount_rate: WACC / required return
40
+ years: Forecast period
41
+
42
+ Returns:
43
+ DCF valuation results
44
+ """
45
+ results = {
46
+ 'method': 'DCF',
47
+ 'assumptions': {
48
+ 'growth_rate': growth_rate,
49
+ 'terminal_growth': terminal_growth,
50
+ 'discount_rate': discount_rate,
51
+ 'forecast_years': years
52
+ }
53
+ }
54
+
55
+ try:
56
+ # Get current FCF
57
+ current_fcf = self.metrics.get('free_cash_flow', 0)
58
+
59
+ if current_fcf <= 0:
60
+ results['error'] = 'Negative or zero FCF - DCF not applicable'
61
+ return results
62
+
63
+ # Project FCF
64
+ projected_fcf = []
65
+ pv_fcf = []
66
+
67
+ for year in range(1, years + 1):
68
+ fcf = current_fcf * ((1 + growth_rate) ** year)
69
+ pv = fcf / ((1 + discount_rate) ** year)
70
+ projected_fcf.append(fcf)
71
+ pv_fcf.append(pv)
72
+
73
+ # Terminal value
74
+ terminal_fcf = projected_fcf[-1] * (1 + terminal_growth)
75
+ terminal_value = terminal_fcf / (discount_rate - terminal_growth)
76
+ pv_terminal = terminal_value / ((1 + discount_rate) ** years)
77
+
78
+ # Enterprise value
79
+ enterprise_value = sum(pv_fcf) + pv_terminal
80
+
81
+ # Equity value
82
+ net_debt = self.metrics.get('total_debt', 0) - self.metrics.get('total_cash', 0)
83
+ equity_value = enterprise_value - net_debt
84
+
85
+ # Per share value
86
+ shares_outstanding = self.metrics.get('shares_outstanding', 1)
87
+ fair_value_per_share = equity_value / shares_outstanding if shares_outstanding > 0 else 0
88
+
89
+ current_price = self.metrics.get('current_price', 0)
90
+ upside = ((fair_value_per_share - current_price) / current_price) * 100 if current_price > 0 else 0
91
+
92
+ results.update({
93
+ 'current_fcf': current_fcf,
94
+ 'pv_cash_flows': sum(pv_fcf),
95
+ 'pv_terminal_value': pv_terminal,
96
+ 'enterprise_value': enterprise_value,
97
+ 'equity_value': equity_value,
98
+ 'fair_value_per_share': fair_value_per_share,
99
+ 'current_price': current_price,
100
+ 'upside_percent': upside,
101
+ 'recommendation': 'BUY' if upside > 15 else 'HOLD' if upside > -10 else 'SELL'
102
+ })
103
+
104
+ except Exception as e:
105
+ results['error'] = f'DCF calculation error: {str(e)}'
106
+
107
+ return results
108
+
109
+ def calculate_relative_valuation(self) -> Dict:
110
+ """
111
+ Calculate valuation using comparable multiples
112
+
113
+ Returns:
114
+ Relative valuation results
115
+ """
116
+ results = {
117
+ 'method': 'Comparable Multiples'
118
+ }
119
+
120
+ current_price = self.metrics.get('current_price', 0)
121
+
122
+ # P/E based valuation
123
+ if self.metrics.get('trailing_pe') and self.metrics.get('eps_trailing'):
124
+ sector_pe = self.sector_metrics.get('trailing_pe_median')
125
+
126
+ if sector_pe:
127
+ eps = self.metrics.get('eps_trailing')
128
+ fair_value_pe = eps * sector_pe
129
+ pe_upside = ((fair_value_pe - current_price) / current_price) * 100 if current_price > 0 else 0
130
+
131
+ results['pe_valuation'] = {
132
+ 'company_pe': self.metrics.get('trailing_pe'),
133
+ 'sector_pe_median': sector_pe,
134
+ 'eps': eps,
135
+ 'fair_value': fair_value_pe,
136
+ 'current_price': current_price,
137
+ 'upside_percent': pe_upside
138
+ }
139
+
140
+ # PEG based valuation
141
+ if self.metrics.get('peg_ratio') and self.metrics.get('eps_forward'):
142
+ sector_peg = self.sector_metrics.get('peg_ratio_median')
143
+ earnings_growth = self.analysis.get('growth_analysis', {}).get('earnings_growth_ttm', 0)
144
+
145
+ if sector_peg and earnings_growth:
146
+ eps_forward = self.metrics.get('eps_forward')
147
+ fair_pe = sector_peg * (earnings_growth * 100)
148
+ fair_value_peg = eps_forward * fair_pe
149
+ peg_upside = ((fair_value_peg - current_price) / current_price) * 100 if current_price > 0 else 0
150
+
151
+ results['peg_valuation'] = {
152
+ 'company_peg': self.metrics.get('peg_ratio'),
153
+ 'sector_peg_median': sector_peg,
154
+ 'earnings_growth': earnings_growth,
155
+ 'fair_pe': fair_pe,
156
+ 'fair_value': fair_value_peg,
157
+ 'current_price': current_price,
158
+ 'upside_percent': peg_upside
159
+ }
160
+
161
+ # P/B based valuation
162
+ if self.metrics.get('price_to_book') and self.metrics.get('book_value_per_share'):
163
+ sector_pb = self.sector_metrics.get('price_to_book_median')
164
+
165
+ if sector_pb:
166
+ book_value = self.metrics.get('book_value_per_share')
167
+ fair_value_pb = book_value * sector_pb
168
+ pb_upside = ((fair_value_pb - current_price) / current_price) * 100 if current_price > 0 else 0
169
+
170
+ results['pb_valuation'] = {
171
+ 'company_pb': self.metrics.get('price_to_book'),
172
+ 'sector_pb_median': sector_pb,
173
+ 'book_value_per_share': book_value,
174
+ 'fair_value': fair_value_pb,
175
+ 'current_price': current_price,
176
+ 'upside_percent': pb_upside
177
+ }
178
+
179
+ # Calculate average upside from available methods
180
+ upsides = []
181
+ if 'pe_valuation' in results:
182
+ upsides.append(results['pe_valuation']['upside_percent'])
183
+ if 'peg_valuation' in results:
184
+ upsides.append(results['peg_valuation']['upside_percent'])
185
+ if 'pb_valuation' in results:
186
+ upsides.append(results['pb_valuation']['upside_percent'])
187
+
188
+ if upsides:
189
+ avg_upside = sum(upsides) / len(upsides)
190
+ results['average_upside'] = avg_upside
191
+ results['recommendation'] = 'BUY' if avg_upside > 15 else 'HOLD' if avg_upside > -10 else 'SELL'
192
+
193
+ return results
194
+
195
+ def scenario_analysis(self) -> Dict:
196
+ """
197
+ Perform bull/base/bear scenario valuation
198
+
199
+ Returns:
200
+ Scenario analysis results
201
+ """
202
+ # Base case: use historical/current growth rates
203
+ base_growth = self.analysis.get('growth_analysis', {}).get('revenue_growth_ttm', 0.08)
204
+ base_growth = max(0.05, min(base_growth, 0.20)) # Cap between 5% and 20%
205
+
206
+ scenarios = {}
207
+
208
+ # Bear case: 20% lower growth, higher risk (lower valuation)
209
+ bear_growth = base_growth * 0.8
210
+ scenarios['bear'] = self.calculate_intrinsic_value_dcf(
211
+ growth_rate=bear_growth,
212
+ discount_rate=0.12 # Higher discount rate = higher risk = lower valuation
213
+ )
214
+
215
+ # Base case
216
+ scenarios['base'] = self.calculate_intrinsic_value_dcf(
217
+ growth_rate=base_growth,
218
+ discount_rate=0.10
219
+ )
220
+
221
+ # Bull case: 20% higher growth, lower risk (higher valuation)
222
+ bull_growth = base_growth * 1.2
223
+ scenarios['bull'] = self.calculate_intrinsic_value_dcf(
224
+ growth_rate=bull_growth,
225
+ discount_rate=0.08 # Lower discount rate = lower risk = higher valuation
226
+ )
227
+
228
+ # Summary
229
+ results = {
230
+ 'scenarios': scenarios,
231
+ 'current_price': self.metrics.get('current_price', 0)
232
+ }
233
+
234
+ # Calculate price ranges
235
+ bear_price = scenarios['bear'].get('fair_value_per_share', 0)
236
+ base_price = scenarios['base'].get('fair_value_per_share', 0)
237
+ bull_price = scenarios['bull'].get('fair_value_per_share', 0)
238
+
239
+ results['price_range'] = {
240
+ 'bear': bear_price,
241
+ 'base': base_price,
242
+ 'bull': bull_price,
243
+ 'range': bull_price - bear_price
244
+ }
245
+
246
+ # Risk/reward assessment
247
+ current_price = results['current_price']
248
+ if current_price > 0:
249
+ downside = ((bear_price - current_price) / current_price) * 100
250
+ upside = ((bull_price - current_price) / current_price) * 100
251
+
252
+ results['risk_reward'] = {
253
+ 'downside_percent': downside,
254
+ 'upside_percent': upside,
255
+ 'risk_reward_ratio': abs(upside / downside) if downside != 0 else 0,
256
+ 'assessment': 'Favorable' if upside > abs(downside) else 'Unfavorable'
257
+ }
258
+
259
+ return results
260
+
261
+ def calculate_margin_of_safety(self) -> Dict:
262
+ """
263
+ Calculate margin of safety
264
+
265
+ Returns:
266
+ Margin of safety metrics
267
+ """
268
+ results = {}
269
+
270
+ current_price = self.metrics.get('current_price', 0)
271
+ if current_price <= 0:
272
+ return {'error': 'Invalid current price'}
273
+
274
+ # Based on DCF
275
+ dcf_result = self.calculate_intrinsic_value_dcf()
276
+ if 'fair_value_per_share' in dcf_result:
277
+ intrinsic_value = dcf_result['fair_value_per_share']
278
+ margin = ((intrinsic_value - current_price) / intrinsic_value) * 100 if intrinsic_value > 0 else 0
279
+
280
+ results['dcf_margin_of_safety'] = {
281
+ 'intrinsic_value': intrinsic_value,
282
+ 'current_price': current_price,
283
+ 'margin_percent': margin,
284
+ 'assessment': self._assess_margin_of_safety(margin)
285
+ }
286
+
287
+ # Based on book value
288
+ book_value = self.metrics.get('book_value_per_share', 0)
289
+ if book_value > 0:
290
+ margin = ((book_value - current_price) / book_value) * 100
291
+ results['book_value_margin'] = {
292
+ 'book_value': book_value,
293
+ 'current_price': current_price,
294
+ 'margin_percent': margin
295
+ }
296
+
297
+ return results
298
+
299
+ def _assess_margin_of_safety(self, margin: float) -> str:
300
+ """Assess margin of safety"""
301
+ if margin >= 30:
302
+ return "Excellent - Strong margin of safety"
303
+ elif margin >= 20:
304
+ return "Good - Adequate margin of safety"
305
+ elif margin >= 10:
306
+ return "Fair - Minimal margin of safety"
307
+ elif margin >= 0:
308
+ return "Weak - Little to no margin of safety"
309
+ else:
310
+ return "Overvalued - Negative margin of safety"
311
+
312
+ def generate_valuation_report(self) -> Dict:
313
+ """
314
+ Generate comprehensive valuation report
315
+
316
+ Returns:
317
+ Complete valuation analysis
318
+ """
319
+ return {
320
+ 'ticker': self.ticker,
321
+ 'current_price': self.metrics.get('current_price', 0),
322
+ 'dcf_valuation': self.calculate_intrinsic_value_dcf(),
323
+ 'relative_valuation': self.calculate_relative_valuation(),
324
+ 'scenario_analysis': self.scenario_analysis(),
325
+ 'margin_of_safety': self.calculate_margin_of_safety()
326
+ }
327
+
328
+
329
+ if __name__ == "__main__":
330
+ print("This module is meant to be imported and used with data from data_fetcher.py and financial_analyzer.py")
src/logs/news_sentiment_2025-11-10.xlsx ADDED
Binary file (21.3 kB). View file
 
src/logs/news_sentiment_2025-11-15.xlsx ADDED
Binary file (5.01 kB). View file
 
src/main.py ADDED
@@ -0,0 +1,205 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import sys
3
+ import threading
4
+ import json
5
+ import logging
6
+ from typing import Optional, Dict, Any
7
+ from fastapi import FastAPI, HTTPException, Header, BackgroundTasks, Depends
8
+ from pydantic import BaseModel
9
+ from datetime import datetime
10
+
11
+ # Add src to path
12
+ sys.path.append(os.path.join(os.path.dirname(__file__), ".."))
13
+
14
+ from src.db.local_database import LocalDatabase, DatabaseEntry, DataType
15
+ from run_saturday_analysis import run_saturday_analysis
16
+
17
+ # Configure logging
18
+ logging.basicConfig(level=logging.INFO)
19
+ logger = logging.getLogger(__name__)
20
+
21
+ app = FastAPI(title="Stock Alchemist Signal Generator")
22
+
23
+ # --- Models ---
24
+ class SignalRequest(BaseModel):
25
+ ticker: Optional[str] = None
26
+ prompt_override: Optional[str] = None
27
+
28
+ class SignalResponse(BaseModel):
29
+ status: str
30
+ message: str
31
+ signal_id: Optional[str] = None
32
+
33
+ # --- Dependencies ---
34
+ def verify_api_secret(x_api_secret: str = Header(...)):
35
+ """Verify the API secret header"""
36
+ expected_secret = os.getenv("API_SECRET")
37
+ if not expected_secret:
38
+ # If no secret is set in env, we might want to fail safe or allow default for dev
39
+ # For production security, better to fail if not configured.
40
+ logger.warning("API_SECRET environment variable not set! Security disabled.")
41
+ return # Allow if env var missing (or raise error based on preference)
42
+ # raise HTTPException(status_code=500, detail="Server misconfiguration: API_SECRET not set")
43
+
44
+ if x_api_secret != expected_secret:
45
+ raise HTTPException(status_code=403, detail="Invalid API Secret")
46
+
47
+ # --- Services ---
48
+
49
+ def generate_signal_logic(ticker: str, prompt_override: Optional[str] = None):
50
+ """
51
+ Core logic to generate a signal using Ollama and save to DB.
52
+ """
53
+ import requests
54
+
55
+ logger.info(f"Generating signal for {ticker}...")
56
+
57
+ # 1. Construct Prompt
58
+ # Need to get some data about the ticker to give to the LLM?
59
+ # For now, we'll assume the prompt asks the LLM to use its internal knowledge or just generate a generic signal based on the ticker name.
60
+ # In a real scenario, we'd fetch news/price data here and feed it.
61
+
62
+ # Let's try to fetch some basic info from DB if available?
63
+ db = LocalDatabase()
64
+
65
+ # Construct prompt
66
+ prompt = prompt_override or f"Analyze the stock {ticker} and provide a trading signal (BUY/SELL/HOLD) with confidence score and reasoning. Format response as JSON."
67
+
68
+ try:
69
+ # 2. Call Ollama
70
+ # Using the local Ollama instance
71
+ ollama_url = "http://localhost:11434/api/generate"
72
+ payload = {
73
+ "model": "llama3.1",
74
+ "prompt": prompt,
75
+ "stream": False,
76
+ "format": "json" # Llama 3 supports json mode often
77
+ }
78
+
79
+ response = requests.post(ollama_url, json=payload, timeout=120)
80
+ response.raise_for_status()
81
+ result = response.json()
82
+ llm_output = result.get('response', '')
83
+
84
+ logger.info(f"Ollama response for {ticker}: {llm_output[:100]}...")
85
+
86
+ # 3. Parse and Save to DB
87
+ # We'll save the raw LLM output as a signal entry
88
+
89
+ # Try to parse JSON from LLM if possible, otherwise wrap it
90
+ try:
91
+ signal_data = json.loads(llm_output)
92
+ except json.JSONDecodeError:
93
+ signal_data = {"raw_output": llm_output}
94
+
95
+ # Extract signal position if possible
96
+ position = signal_data.get('signal', signal_data.get('recommendation', 'HOLD')).upper()
97
+ if position not in ['BUY', 'SELL', 'HOLD']:
98
+ position = 'HOLD' # Default
99
+
100
+ # Save using LocalDatabase
101
+ # We need to use save_signal or save generic entry?
102
+ # save_signal requires specific keys. Let's use save generic entry or try save_signal if we have the keys.
103
+ # simpler to just update the 'signals' table logic in LocalDatabase or use db.save() with DataType.CUSTOM?
104
+ # The user's signals table has specific columns.
105
+ # local_database.py -> save_signal(self, ticker, calendar_event_keys, news_keys, fundamental_key, signal_position, sentiment)
106
+
107
+ # We'll provide empty lists for keys for now as we didn't link specific events
108
+ is_saved = db.save_signal(
109
+ ticker=ticker,
110
+ calendar_event_keys=[],
111
+ news_keys=[],
112
+ fundamental_key="generated_by_ollama",
113
+ signal_position=position,
114
+ sentiment=signal_data
115
+ )
116
+
117
+ if is_saved:
118
+ logger.info(f"Signal saved for {ticker}")
119
+ else:
120
+ logger.error(f"Failed to save signal for {ticker}")
121
+
122
+ except Exception as e:
123
+ logger.error(f"Error generating signal for {ticker}: {e}")
124
+
125
+ # --- Endpoints ---
126
+
127
+ @app.post("/generate-signal", response_model=SignalResponse, dependencies=[Depends(verify_api_secret)])
128
+ async def generate_signal(request: SignalRequest, background_tasks: BackgroundTasks):
129
+ """
130
+ Trigger signal generation.
131
+ If ticker is provided, generates for that ticker.
132
+ If not, could pick a random one or all? Let's require ticker for now or pick first available.
133
+ """
134
+ target_ticker = request.ticker
135
+
136
+ if not target_ticker:
137
+ # Pick a ticker from DB?
138
+ try:
139
+ db = LocalDatabase()
140
+ tickers = db.get_all_available_tickers()
141
+ if tickers:
142
+ target_ticker = tickers[0] # Just pick the first one for the demo/daily run
143
+ else:
144
+ raise HTTPException(status_code=404, detail="No tickers available in database")
145
+ except Exception as e:
146
+ raise HTTPException(status_code=500, detail=f"Database error: {e}")
147
+
148
+ # Run in background to avoid timeout
149
+ background_tasks.add_task(generate_signal_logic, target_ticker, request.prompt_override)
150
+
151
+ return SignalResponse(
152
+ status="accepted",
153
+ message=f"Signal generation started for {target_ticker}"
154
+ )
155
+
156
+ @app.post("/saturday-analysis", dependencies=[Depends(verify_api_secret)])
157
+ async def trigger_saturday_analysis(background_tasks: BackgroundTasks):
158
+ """
159
+ Trigger the saturday analysis script.
160
+ """
161
+ background_tasks.add_task(run_saturday_analysis)
162
+ return {"status": "accepted", "message": "Saturday analysis started"}
163
+
164
+ @app.get("/health")
165
+ async def health_check():
166
+ """
167
+ Simple health check.
168
+ Also logs vitals as requested.
169
+ """
170
+ # Verify DB connection
171
+ db_status = "unknown"
172
+ try:
173
+ db = LocalDatabase()
174
+ if db._create_connection():
175
+ db_status = "connected"
176
+ else:
177
+ db_status = "disconnected"
178
+ except Exception as e:
179
+ db_status = f"error: {e}"
180
+
181
+ # Check Ollama
182
+ ollama_status = "unknown"
183
+ try:
184
+ import requests
185
+ resp = requests.get("http://localhost:11434/api/tags", timeout=5)
186
+ if resp.status_code == 200:
187
+ ollama_status = "running"
188
+ else:
189
+ ollama_status = f"error: {resp.status_code}"
190
+ except Exception:
191
+ ollama_status = "down"
192
+
193
+ vitals = {
194
+ "status": "ok",
195
+ "time": datetime.now().isoformat(),
196
+ "database": db_status,
197
+ "ollama": ollama_status
198
+ }
199
+
200
+ logger.info(f"Health Check: {vitals}")
201
+ return vitals
202
+
203
+ if __name__ == "__main__":
204
+ import uvicorn
205
+ uvicorn.run(app, host="0.0.0.0", port=7860)
src/news_processing_performance.csv ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ timestamp,ticker,headline,handler1_duration_sec,handler2_duration_sec,handler3_duration_sec,handler4_duration_sec,handler5_duration_sec,marketaux_processing_times_sec
2
+ 2025-12-02 15:22:14,XYZ,"Block Processed 124M Transactions Over Black Friday-Cyber Monday Period, 49.8M Unique Consumers Engaged With Square, Afterpay And Cash App Card",0.0671,0.6457,34.8712,1.2196,0.1178,[]
3
+ 2025-12-02 15:23:27,MDB,"Scotiabank Maintains Sector Perform on MongoDB, Raises Price Target to $415",0.3386,0.7927,18.3751,30.3256,0.0818,"['29.8627', '20.6539', '25.1773']"
4
+ 2025-12-02 15:23:49,GENI,Genius Sports Announces Deal With FanDuel Sports Network To Launch Intelligent Content Platform Across NBA And WNBA,0.0910,0.6553,19.7569,0.3972,0.0897,[]
5
+ 2025-12-02 15:26:40,MSFT,Why Satya Nadella Spends Weekends Studying Startups— And What It Reveals About Microsoft&#39;s AI Fears,0.0723,1.1099,17.3942,28.3587,0.1097,"['22.4378', '27.8175', '17.4317']"
6
+ 2025-12-02 15:27:03,CRDO,"Barclays Maintains Overweight on Credo Technology Group, Raises Price Target to $220",0.0537,0.6266,17.4651,0.5390,0.0806,[]
7
+ 2025-12-02 15:27:48,BA,"Boeing Shares Higher Amid Presentation At UBS Global Industrials and Transportation
8
+ Conference",0.0705,0.6446,17.0340,18.8040,0.1045,['18.3200']
9
+ 2025-12-02 15:29:12,BA,"Boeing CFO Says Boeing Sees 2026 Deliveries Up For 737, 787 Vs 2025; 737 Max 10 Seen Certified In Late 2026; Boeing Sees $2B Free Cash Outflow For 2025, Sees DoJ Penalty To Flip Into 2026",0.0979,0.6760,22.3228,17.7965,0.0923,['17.2851']
10
+ 2025-12-02 15:29:33,LI,"Barclays Maintains Equal-Weight on Li Auto, Lowers Price Target to $18",0.0506,0.8701,17.2470,0.4168,0.0962,[]
11
+ 2025-12-02 15:59:17,FIVE,"Guggenheim Maintains Buy on Five Below, Raises Price Target to $185",0.0560,1.2611,38.0789,1.0606,0.2354,[]
12
+ 2025-12-02 16:03:34,ABT,Top 2 Health Care Stocks You May Want To Dump This Month,0.0619,2.3249,17.1654,18.4556,0.3189,['18.0078']
13
+ 2025-12-02 16:05:20,EXPI,"eXp World&#39;s eXp Realty Unit Expands Operations In Romania, Netherlands",0.0838,0.6778,22.5123,0.5070,0.0741,[]
14
+ 2025-12-02 16:05:46,AMD,"HPE Announces AMD Helios AI Rack Scale Solution, With Scale-Up Networking Built With Broadcom",0.0647,1.8995,21.8066,0.4986,0.3523,[]
15
+ 2025-12-02 16:06:36,EXFY,Expensify Announces Improved Expense Integration With Uber For Business,0.0696,1.5454,24.0861,0.4225,0.1837,[]
16
+ 2025-12-02 16:07:23,AMTM,"Cantor Fitzgerald Maintains Overweight on Amentum Holdings, Raises Price Target to $40",0.0703,0.6492,20.7363,0.4494,0.0900,[]
17
+ 2025-12-02 16:07:46,ANNX,"Clear Street Initiates Coverage On Annexon with Buy Rating, Announces Price Target of $17",0.0608,0.6555,19.4098,0.4228,0.0930,[]
18
+ 2025-12-02 16:08:35,WMT,"Walmart Highlights Opening Of 2nd Owned, Operated Milk Processing Facility In Georgia",0.0728,0.9513,22.1578,24.2291,0.0815,"['23.7437', '18.1360']"
19
+ 2025-12-02 16:08:59,GEHC,GE HealthCare Unveils Advanced Image Technologies In Partnership With Nvidia,0.0691,1.4951,19.3745,0.4110,0.2545,[]
20
+ 2025-12-02 16:09:53,AMZN,"Atlassian Announces Public Listing Of Jira, Confluence, Jira Service Management On Amazon Web Services Marketplace",0.0593,1.0465,18.7451,30.7071,0.1686,"['25.3135', '30.2298', '20.0358']"
21
+ 2025-12-02 16:10:16,CART,Instacart And The Home Depot Canada Launch Nationwide Partnership To Offer Same-Day Delivery Ahead Of The Holiday Season,0.0729,1.5587,19.9771,0.4319,0.1495,[]
22
+ 2025-12-02 16:11:11,JANX,"Clear Street Maintains Buy on Janux Therapeutics, Lowers Price Target to $32",0.0754,0.7339,22.9644,30.7776,0.1054,['30.2398']
23
+ 2025-12-02 16:12:40,BA,"Boeing shares are trading higher after the company reportedly said that it expects a year-over-year increase in its FY26 deliveries of its 737, 787 models and that the DoJ penalty will flip into 2026. The company will complete its Spirit acquisition this year.",0.1395,0.9766,63.2482,20.9684,0.0784,['20.4570']
24
+ 2025-12-02 16:13:22,SHOP,Shopify shares are trading higher after the company announced its merchants achieved $14.6 billion in sales over the Black Friday-Cyber Monday weekend. This represented a 27% year-over-year increase.,0.1278,0.7203,19.9788,0.4339,0.0795,[]
25
+ 2025-12-02 16:13:43,VOO,Americans Feel Bleak Despite Strong Economy — Here&#39;s Why,0.0610,0.6459,18.8759,0.4239,0.0866,[]
26
+ 2025-12-02 16:18:39,MDB,"Wells Fargo Maintains Overweight on MongoDB, Raises Price Target to $450",0.1581,1.5171,17.3543,16.9989,0.0814,['0.0']
27
+ 2025-12-02 16:18:59,DRVN,FEP To Acquire IMO Cars Wash From Driven Brands; Terms Not Disclosed,0.0471,0.6079,16.6443,0.7564,0.0646,[]
28
+ 2025-12-02 16:20:28,ASPI,"ASP Isotopes Founder Paul Mann To Return As CEO On January 19, 2026, After Health-Related Leave; Interim CEO Robert Ainscow To Resume COO-Only Role",0.0739,0.6738,19.5016,64.9769,0.0901,['0.0']
29
+ 2025-12-02 16:22:48,EIX,"UBS Maintains Buy on Edison Intl, Raises Price Target to $70",0.0610,1.1330,21.5212,22.7984,0.1066,['0.0']
30
+ 2025-12-02 16:24:43,JAMF,Jamf Partners With UberEther To Pursue FedRAMP High And DoD IL5 Authorization,0.1184,0.6767,28.2747,84.8554,0.1146,['0.0']
31
+ 2025-12-02 16:25:41,TXG,"Morgan Stanley Downgrades 10x Genomics to Equal-Weight, Raises Price Target to $20",0.0753,1.1334,23.7296,29.5197,0.0881,['0.0']
32
+ 2025-12-02 16:26:40,MDB,"UBS Maintains Neutral on MongoDB, Raises Price Target to $440",0.0642,0.6210,19.2360,28.2389,0.1334,['0.0']
33
+ 2025-12-02 16:38:19,MDB,"Canaccord Genuity Maintains Buy on MongoDB, Raises Price Target to $455",0.0653,1.3490,41.2146,19.6604,0.1831,['0.0']
34
+ 2025-12-02 16:40:40,ALB,Applied Materials To Rally Around 12%? Here Are 10 Top Analyst Forecasts For Tuesday,0.3986,6.8471,17.3188,115.0094,1.0037,['0.0']
35
+ 2025-12-02 16:51:44,AAPL,"Loop Capital Maintains Buy on Apple, Raises Price Target to $325",0.0763,1.4723,39.4810,20.8113,0.3246,['0.0']
36
+ 2025-12-02 16:54:34,ABTC,Nasdaq Surges 200 Points; United Natural Foods Posts Upbeat Earnings,0.0543,2.5619,19.3508,120.4805,0.8688,['0.0']
37
+ 2025-12-02 16:57:45,MSFT,"Speaking At UBS Global Technology And AI Conference, Nvidia CFO Says &#34;Still Haven&#39;t Completed A Definitive Agreement&#34; With OpenAI; Says $500B In Bookings For Blackwell And Rubin AI Chips Through 2026 Does Not Include &#34;Any Of The Work That We&#39;re Doing Right Now, On The Next Part Of The Agreement With OpenAI&#34;",0.1744,1.6818,57.4261,19.6906,0.3012,['0.0']
src/news_scraper/adapters/__init__.py ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ # FILE: /stock-news-scraper/stock-news-scraper/src/adapters/__init__.py
2
+ # This file is intentionally left blank.
src/news_scraper/adapters/alpaca_ws.py ADDED
@@ -0,0 +1,195 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import threading
3
+ import websocket
4
+ import json
5
+ import time
6
+ import logging
7
+ from news_scraper.models.news import News
8
+ from news_scraper.helpers.news_db_logger import NewsDBLogger
9
+
10
+ logger = logging.getLogger(__name__)
11
+
12
+ class AlpacaNewsFeedAdapter:
13
+ """
14
+ Singleton class for Alpaca WebSocket API adapter.
15
+ This class connects to the Alpaca WebSocket API to receive real-time news updates.
16
+ """
17
+ _instance = None
18
+ _lock = threading.Lock()
19
+
20
+ def __new__(cls):
21
+ if cls._instance is None:
22
+ with cls._lock:
23
+ if cls._instance is None:
24
+ cls._instance = super(AlpacaNewsFeedAdapter, cls).__new__(cls)
25
+ cls._instance.__initialized = False
26
+ return cls._instance
27
+
28
+ def __init__(self):
29
+ if self.__initialized:
30
+ return
31
+ self.__initialized = True
32
+ # Initialize the Alpaca WebSocket API connection
33
+ apca_key = "PKFF63XPNPCWWZXYXRK223NE2P"
34
+ apca_secret = "HqJE1b2jHuTvZzGGHW2jRHBajcmBw18PNJzUcNXLhfZ5"
35
+ self.websocket_url = "wss://stream.data.alpaca.markets/v1beta1/news"
36
+ self.headers = {
37
+ "APCA-API-KEY-ID": apca_key,
38
+ "APCA-API-SECRET-KEY": apca_secret,
39
+ }
40
+ self.ws = None
41
+ self.ws_thread = None
42
+ self.authenticated = False
43
+ self.connected = False
44
+ self.callbacks = []
45
+ self.db_logger = NewsDBLogger()
46
+ self.connect()
47
+
48
+ def connect(self):
49
+ """
50
+ Connect to the Alpaca WebSocket API.
51
+ """
52
+ self.ws = websocket.WebSocketApp(
53
+ self.websocket_url,
54
+ on_open=self.on_open,
55
+ on_message=self.on_message,
56
+ on_error=self.on_error,
57
+ on_close=self.on_close,
58
+ header=self.headers
59
+ )
60
+ # Run the WebSocket in a separate thread
61
+ self.ws_thread = threading.Thread(target=self.ws.run_forever)
62
+ self.ws_thread.daemon = True
63
+ self.ws_thread.start()
64
+
65
+ def on_open(self, ws):
66
+ """
67
+ Handle WebSocket open event.
68
+ """
69
+ print("WebSocket connection established")
70
+
71
+ def subscribe_to_news(self):
72
+ """
73
+ Subscribe to all news updates.
74
+ """
75
+ subscription_msg = {
76
+ "action": "subscribe",
77
+ "news": ["*"]
78
+ }
79
+ self.ws.send(json.dumps(subscription_msg))
80
+ #print("Sent subscription request for news")
81
+
82
+ def on_message(self, ws, message):
83
+ """
84
+ Handle incoming messages from the WebSocket.
85
+ """
86
+ print(f"[DEBUG] Raw message: {message}")
87
+ data = json.loads(message)
88
+
89
+ # Handle different message types
90
+ if isinstance(data, list) and len(data) > 0:
91
+ msg_type = data[0].get("T", "")
92
+
93
+ # Handle connection success
94
+ if msg_type == "success" and data[0].get("msg") == "connected":
95
+ self.connected = True
96
+ #print("Successfully connected to Alpaca News WebSocket")
97
+
98
+ # Handle authentication success
99
+ elif msg_type == "success" and data[0].get("msg") == "authenticated":
100
+ self.authenticated = True
101
+ #print("Successfully authenticated with Alpaca News WebSocket")
102
+ # Subscribe to news after authentication
103
+ self.subscribe_to_news()
104
+
105
+ # Handle subscription confirmation
106
+ elif msg_type == "subscription":
107
+ print("[ALPACA] Successfully subscribed to news channels:", data[0].get("news", []))
108
+
109
+ # Handle actual news updates
110
+ elif data[0].get("T") == "n":
111
+ # Process news data using the News class
112
+ news_item = News(
113
+ id=data[0].get("id"),
114
+ headline=data[0].get("headline"),
115
+ summary=data[0].get("summary"),
116
+ author=data[0].get("author"),
117
+ created_at=data[0].get("created_at"),
118
+ updated_at=data[0].get("updated_at"),
119
+ url=data[0].get("url"),
120
+ content=data[0].get("content"),
121
+ symbols=data[0].get("symbols", []),
122
+ source=data[0].get("source")
123
+ )
124
+ symbols_str = ', '.join(news_item.symbols) if news_item.symbols else ''
125
+ headline_preview = news_item.headline[:50] if news_item.headline else "No headline"
126
+ logger.info(f"[ALPACA] Received news | '{news_item.headline}' ({symbols_str})")
127
+
128
+ # Log to database (without sentiment for now) - REMOVED to avoid duplicate logging
129
+ # self.db_logger.log_news_with_sentiment(news_item)
130
+ # logger.info(f"[ALPACA] Logged to DB | {headline_preview}...")
131
+
132
+ # Call registered callbacks only when we have a news item
133
+ for callback in self.callbacks:
134
+ callback(news_item)
135
+
136
+ def on_error(self, ws, error):
137
+ """
138
+ Handle errors from the WebSocket.
139
+ """
140
+ print(f"Error: {error}")
141
+
142
+ def on_close(self, ws, close_status_code, close_msg):
143
+ """
144
+ Handle WebSocket closure.
145
+ """
146
+ print(f"WebSocket closed: {close_status_code} - {close_msg}")
147
+ self.connected = False
148
+ self.authenticated = False
149
+
150
+ # Attempt to reconnect after a delay
151
+ print("Attempting to reconnect in 5 seconds...")
152
+ time.sleep(5)
153
+ self.connect()
154
+
155
+ def register_callback(self, callback):
156
+ """
157
+ Register a callback function to be called when news is received.
158
+ The callback function should accept a news item dictionary as its parameter.
159
+ """
160
+ if callable(callback):
161
+ self.callbacks.append(callback)
162
+ return True
163
+ return False
164
+
165
+ def close(self):
166
+ """
167
+ Close the WebSocket connection.
168
+ """
169
+ if self.ws:
170
+ self.ws.close()
171
+ self.ws = None
172
+
173
+ if __name__ == "__main__":
174
+ # Example usage with a callback function
175
+ def print_news(news_item):
176
+ print(f"[PROCESSOR] [QUEUE] News item | {news_item.headline}")
177
+
178
+ # Create the adapter
179
+ alpaca_adapter = AlpacaNewsFeedAdapter()
180
+ alpaca_adapter.register_callback(print_news)
181
+
182
+ # Keep the main thread alive to receive messages
183
+ try:
184
+ while True:
185
+ time.sleep(1)
186
+ except KeyboardInterrupt:
187
+ print("Exiting...")
188
+ alpaca_adapter.close()
189
+
190
+
191
+
192
+
193
+
194
+
195
+
src/news_scraper/adapters/base_adapter.py ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ class BaseAdapter:
2
+
3
+ def fetch_articles(self):
4
+ raise NotImplementedError("Subclasses must implement this method.")
5
+
6
+ def parse_article(self, raw_data):
7
+ raise NotImplementedError("Subclasses must implement this method.")
8
+
9
+ def analyze_article(self, article):
10
+ raise NotImplementedError("Subclasses must implement this method.")
src/news_scraper/adapters/bloomberg_adapter.py ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from news_scraper.adapters.base_adapter import BaseAdapter
2
+ from news_scraper.models.article import Article
3
+
4
+ class BloombergAdapter(BaseAdapter):
5
+
6
+ def __init__(self):
7
+ super().__init__()
8
+
9
+ def fetch_articles(self):
10
+ # Implementation for fetching articles from Bloomberg
11
+ pass
12
+
13
+ def parse_article(self, raw_article):
14
+ # Implementation for parsing a raw article from Bloomberg
15
+ pass
16
+
17
+ def save_to_database(self, article: Article):
18
+ # Implementation for saving the article to the database
19
+ pass
src/news_scraper/adapters/motley_fool_adapter.py ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from .base_adapter import BaseAdapter
2
+ from news_scraper.models.article import Article
3
+
4
+ class MotleyFoolAdapter(BaseAdapter):
5
+
6
+ def __init__(self):
7
+ super().__init__()
8
+
9
+ def fetch_articles(self):
10
+ # Implementation for fetching articles from Bloomberg
11
+ pass
12
+
13
+ def parse_article(self, raw_article):
14
+ # Implementation for parsing a raw article from Bloomberg
15
+ pass
16
+
17
+ def save_to_database(self, article: Article):
18
+ # Implementation for saving the article to the database
19
+ pass
src/news_scraper/adapters/yahoo_finance_adapter.py ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from datetime import datetime
2
+ from .base_adapter import BaseAdapter
3
+ from news_scraper.models.article import Article
4
+ import yfinance as yf
5
+ from news_scraper.services.sentiment_analysis_gemini import analyze_sentiment
6
+ from news_scraper.helpers.news_db_logger import NewsDBLogger
7
+
8
+ class YahooFinanceAdapter(BaseAdapter):
9
+ def __init__(self):
10
+ super().__init__()
11
+ self.db_logger = NewsDBLogger()
12
+
13
+ def fetch_articles(self):
14
+ # Logic to scrape articles from Yahoo Finance
15
+ return(yf.Search('AAPL',news_count=1).news)
16
+
17
+ def parse_article(self, raw_article):
18
+ # Logic to parse a raw article into an Article object
19
+ publish_time = datetime.fromtimestamp(raw_article['providerPublishTime'])
20
+ article= Article(url=raw_article['link'], title=raw_article['title'],description=None, score=None, ticker=raw_article['relatedTickers'],time=publish_time)
21
+ print(article.__repr__())
22
+ return article
23
+
24
+ def analyze_article(self, article):
25
+ # Use Gemini sentiment analysis on the article
26
+ sentiment_result = analyze_sentiment(url=article.url)
27
+
28
+ # Log the article and its sentiment to database
29
+ self.db_logger.log_news_with_sentiment(article, sentiment_result)
30
+
31
+ # Continue with the original analysis
32
+ return super().analyze_article(article)
33
+
34
+ """
35
+ Format of the raw article object returned by Yahoo Finance API:
36
+ [{'uuid': '637daa50-f230-3ddd-a251-1b9840c38c10',
37
+ 'title': 'Apple Inc. (AAPL): Alibaba Partnership to Bring AI to iPhones in China',
38
+ 'publisher': 'Insider Monkey',
39
+ 'link': 'https://finance.yahoo.com/news/apple-inc-aapl-alibaba-partnership-044613466.html',
40
+ 'providerPublishTime': 1739421973,
41
+ 'type': 'STORY',
42
+ 'thumbnail': {'resolutions': [{'url': 'https://s.yimg.com/uu/api/res/1.2/fTPNobtT0bdcSxj2xXAamw--~B/aD04MTY7dz0xNDU2O2FwcGlkPXl0YWNoeW9u/https://media.zenfs.com/en/insidermonkey.com/bd47dc2e86b4a3086f845d200d9daf1f', 'width': 1456, 'height': 816, 'tag': 'original'}, {'url': 'https://s.yimg.com/uu/api/res/1.2/nIyTyNRglKMNfFFMGli.7A--~B/Zmk9ZmlsbDtoPTE0MDtweW9mZj0wO3c9MTQwO2FwcGlkPXl0YWNoeW9u/https://media.zenfs.com/en/insidermonkey.com/bd47dc2e86b4a3086f845d200d9daf1f', 'width': 140, 'height': 140, 'tag': '140x140'}]},
43
+ 'relatedTickers': ['AAPL']}]
44
+ """
src/news_scraper/helpers/news_db_logger.py ADDED
@@ -0,0 +1,248 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Database Logger for News Items
3
+ Replaces ExcelLogger - logs news with sentiment directly to MySQL news table
4
+ """
5
+
6
+ import sys
7
+ import logging
8
+ import threading
9
+ from pathlib import Path
10
+ from datetime import datetime
11
+ from typing import Optional, List, Tuple
12
+
13
+ # Add src to path for imports
14
+ sys.path.append(str(Path(__file__).parent.parent.parent))
15
+
16
+ from db.local_database import LocalDatabase, DatabaseEntry, DataType
17
+
18
+ logger = logging.getLogger(__name__)
19
+
20
+
21
+ class NewsDBLogger:
22
+ """
23
+ Logs news items with sentiment analysis directly to the MySQL news table.
24
+ Replaces the old ExcelLogger functionality.
25
+ """
26
+ _instance = None
27
+ _lock = threading.Lock()
28
+
29
+ def __new__(cls):
30
+ if cls._instance is None:
31
+ with cls._lock:
32
+ if cls._instance is None:
33
+ cls._instance = super(NewsDBLogger, cls).__new__(cls)
34
+ cls._instance.initialized = False
35
+ return cls._instance
36
+
37
+ def __init__(self):
38
+ """Initialize the database logger."""
39
+ if self.initialized:
40
+ return
41
+ self.initialized = True
42
+ self.db = LocalDatabase()
43
+ logger.info("✅ NewsDBLogger initialized with MySQL backend")
44
+
45
+ def log_news_with_sentiment(self, news_item, pre_sentiment=None, sentiment=None, rating=None, processing_time=None):
46
+ """
47
+ Log a news item and its sentiment analysis to the database.
48
+
49
+ Args:
50
+ news_item: The news item object containing news details
51
+ pre_sentiment (str, optional): Pre-processed sentiment analysis text
52
+ sentiment (str, optional): Processed sentiment analysis text
53
+ rating (str or float, optional): Sentiment score/rating
54
+ processing_time (float, optional): Time taken to process this news item in seconds
55
+ """
56
+ try:
57
+ # Extract symbols/ticker
58
+ ticker = "GENERAL" # Default ticker
59
+ symbols_str = ""
60
+
61
+ if hasattr(news_item, 'symbols') and news_item.symbols:
62
+ symbols_list = news_item.symbols if isinstance(news_item.symbols, list) else [news_item.symbols]
63
+ symbols_str = ', '.join(symbols_list)
64
+ ticker = symbols_list[0] if symbols_list else "GENERAL"
65
+ elif isinstance(news_item, dict) and 'symbols' in news_item:
66
+ symbols_list = news_item['symbols'] if isinstance(news_item['symbols'], list) else [news_item['symbols']]
67
+ symbols_str = ', '.join(symbols_list)
68
+ ticker = symbols_list[0] if symbols_list else "GENERAL"
69
+
70
+ # Get date
71
+ news_date = datetime.now().strftime("%Y-%m-%d")
72
+ if hasattr(news_item, 'created_at'):
73
+ try:
74
+ news_date = str(news_item.created_at).split('T')[0]
75
+ except:
76
+ pass
77
+ elif isinstance(news_item, dict) and 'created_at' in news_item:
78
+ try:
79
+ news_date = str(news_item['created_at']).split('T')[0]
80
+ except:
81
+ pass
82
+
83
+ # Build the data payload matching Excel format
84
+ data = {
85
+ 'Timestamp': datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
86
+ 'NewsID': getattr(news_item, 'id', None) if hasattr(news_item, 'id') else (news_item.get('id') if isinstance(news_item, dict) else None),
87
+ 'Headline': getattr(news_item, 'headline', None) if hasattr(news_item, 'headline') else (news_item.get('headline') if isinstance(news_item, dict) else None),
88
+ 'URL': getattr(news_item, 'url', None) if hasattr(news_item, 'url') else (news_item.get('url') if isinstance(news_item, dict) else None),
89
+ 'Source': getattr(news_item, 'source', None) if hasattr(news_item, 'source') else (news_item.get('source') if isinstance(news_item, dict) else None),
90
+ 'Symbols': symbols_str,
91
+ 'PreSentimentScore': pre_sentiment,
92
+ 'SentimentScore': rating,
93
+ 'SentimentAnalysis': sentiment,
94
+ 'TimeToProcess': processing_time
95
+ }
96
+
97
+ # Create database entry
98
+ entry = DatabaseEntry(
99
+ date=news_date,
100
+ data_type=DataType.NEWS.value, # "news"
101
+ ticker=ticker,
102
+ data=data,
103
+ metadata={
104
+ 'logged_at': datetime.now().isoformat(),
105
+ 'has_sentiment': sentiment is not None,
106
+ 'processing_time': processing_time
107
+ }
108
+ )
109
+
110
+ # Save to database
111
+ success = self.db.save(entry, expiry_days=90) # Keep news for 90 days
112
+
113
+ if success:
114
+ headline = data.get('Headline', 'Unknown headline')
115
+ logger.info(f"✅ Logged to DB | {headline[:60]}...")
116
+ else:
117
+ logger.error(f"❌ Failed to log news to database")
118
+
119
+ return success
120
+
121
+ except Exception as e:
122
+ logger.error(f"❌ Error logging news: {str(e)}")
123
+ import traceback
124
+ traceback.print_exc()
125
+ return False
126
+
127
+ def log_batch(self, news_items_with_sentiment_and_times: List[Tuple]):
128
+ """
129
+ Log multiple news items with sentiment in batch.
130
+
131
+ Args:
132
+ news_items_with_sentiment_and_times: List of tuples (news_item, sentiment_data, processing_time)
133
+ processing_time can be None if unavailable
134
+ """
135
+ try:
136
+ entries = []
137
+
138
+ for item_data in news_items_with_sentiment_and_times:
139
+ # Unpack the tuple - handle both 2-element and 3-element tuples
140
+ if len(item_data) == 2:
141
+ news_item, sentiment_data = item_data
142
+ processing_time = None
143
+ elif len(item_data) == 3:
144
+ news_item, sentiment_data, processing_time = item_data
145
+ else:
146
+ print(f"⚠️ Invalid item data format: {item_data}")
147
+ continue
148
+
149
+ # Extract sentiment details
150
+ pre_sentiment = sentiment_data.get('pre_sentiment') if isinstance(sentiment_data, dict) else None
151
+ sentiment = sentiment_data.get('sentiment') if isinstance(sentiment_data, dict) else None
152
+ rating = sentiment_data.get('rating') if isinstance(sentiment_data, dict) else None
153
+
154
+ # Extract symbols/ticker
155
+ ticker = "GENERAL"
156
+ symbols_str = ""
157
+
158
+ if hasattr(news_item, 'symbols') and news_item.symbols:
159
+ symbols_list = news_item.symbols if isinstance(news_item.symbols, list) else [news_item.symbols]
160
+ symbols_str = ', '.join(symbols_list)
161
+ ticker = symbols_list[0] if symbols_list else "GENERAL"
162
+ elif isinstance(news_item, dict) and 'symbols' in news_item:
163
+ symbols_list = news_item['symbols'] if isinstance(news_item['symbols'], list) else [news_item['symbols']]
164
+ symbols_str = ', '.join(symbols_list)
165
+ ticker = symbols_list[0] if symbols_list else "GENERAL"
166
+
167
+ # Get date
168
+ news_date = datetime.now().strftime("%Y-%m-%d")
169
+ if hasattr(news_item, 'created_at'):
170
+ try:
171
+ news_date = str(news_item.created_at).split('T')[0]
172
+ except:
173
+ pass
174
+ elif isinstance(news_item, dict) and 'created_at' in news_item:
175
+ try:
176
+ news_date = str(news_item['created_at']).split('T')[0]
177
+ except:
178
+ pass
179
+
180
+ # Build data payload
181
+ data = {
182
+ 'Timestamp': datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
183
+ 'NewsID': getattr(news_item, 'id', None) if hasattr(news_item, 'id') else (news_item.get('id') if isinstance(news_item, dict) else None),
184
+ 'Headline': getattr(news_item, 'headline', None) if hasattr(news_item, 'headline') else (news_item.get('headline') if isinstance(news_item, dict) else None),
185
+ 'URL': getattr(news_item, 'url', None) if hasattr(news_item, 'url') else (news_item.get('url') if isinstance(news_item, dict) else None),
186
+ 'Source': getattr(news_item, 'source', None) if hasattr(news_item, 'source') else (news_item.get('source') if isinstance(news_item, dict) else None),
187
+ 'Symbols': symbols_str,
188
+ 'PreSentimentScore': pre_sentiment,
189
+ 'SentimentScore': rating,
190
+ 'SentimentAnalysis': sentiment,
191
+ 'TimeToProcess': processing_time
192
+ }
193
+
194
+ # Create database entry
195
+ entry = DatabaseEntry(
196
+ date=news_date,
197
+ data_type=DataType.NEWS.value,
198
+ ticker=ticker,
199
+ data=data,
200
+ metadata={
201
+ 'logged_at': datetime.now().isoformat(),
202
+ 'has_sentiment': sentiment is not None,
203
+ 'processing_time': processing_time
204
+ }
205
+ )
206
+
207
+ entries.append(entry)
208
+
209
+ # Batch save to database
210
+ if entries:
211
+ saved_count = self.db.save_batch(entries, expiry_days=90)
212
+ logger.info(f"✅ Batch logged {saved_count}/{len(entries)} news items")
213
+ return saved_count
214
+ else:
215
+ logger.warning("⚠️ No valid entries to log")
216
+ return 0
217
+
218
+ except Exception as e:
219
+ logger.error(f"❌ Error batch logging: {str(e)}")
220
+ import traceback
221
+ traceback.print_exc()
222
+ return 0
223
+
224
+
225
+ # Example usage
226
+ if __name__ == "__main__":
227
+ logger = NewsDBLogger()
228
+
229
+ # Test with a mock news item
230
+ class MockNews:
231
+ def __init__(self):
232
+ self.id = "test123"
233
+ self.headline = "Test Headline"
234
+ self.url = "https://example.com"
235
+ self.source = "TestSource"
236
+ self.symbols = ["AAPL", "MSFT"]
237
+ self.created_at = "2025-01-15T10:30:00Z"
238
+
239
+ mock_news = MockNews()
240
+ logger.log_news_with_sentiment(
241
+ mock_news,
242
+ pre_sentiment="POSITIVE",
243
+ sentiment="The news is very positive",
244
+ rating=0.85,
245
+ processing_time=1.5
246
+ )
247
+
248
+ print("\n✅ Test completed - check database for entry")
src/news_scraper/helpers/performance_logger.py ADDED
@@ -0,0 +1,63 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import csv
2
+ import os
3
+ import logging
4
+ from datetime import datetime
5
+
6
+ logger = logging.getLogger(__name__)
7
+
8
+ class PerformanceLogger:
9
+ def __init__(self, filename="news_processing_performance.csv"):
10
+ self.filename = filename
11
+ self._initialize_csv()
12
+
13
+ def _initialize_csv(self):
14
+ """Initialize the CSV file with headers if it doesn't exist."""
15
+ if not os.path.exists(self.filename):
16
+ try:
17
+ with open(self.filename, mode='w', newline='', encoding='utf-8') as file:
18
+ writer = csv.writer(file)
19
+ headers = [
20
+ "timestamp",
21
+ "ticker",
22
+ "headline",
23
+ "handler1_duration_sec",
24
+ "handler2_duration_sec",
25
+ "handler3_duration_sec",
26
+ "handler4_duration_sec",
27
+ "handler5_duration_sec",
28
+ "marketaux_processing_times_sec"
29
+ ]
30
+ writer.writerow(headers)
31
+ logger.info(f"[PERFORMANCE] Initialized log file: {self.filename}")
32
+ except Exception as e:
33
+ logger.error(f"[PERFORMANCE] Failed to initialize log file: {e}")
34
+
35
+ def log_metrics(self, metrics: dict):
36
+ """
37
+ Log a dictionary of metrics to the CSV file.
38
+
39
+ Args:
40
+ metrics (dict): Dictionary containing metric values.
41
+ """
42
+ try:
43
+ with open(self.filename, mode='a', newline='', encoding='utf-8') as file:
44
+ writer = csv.writer(file)
45
+
46
+ # Extract values with defaults
47
+ row = [
48
+ datetime.now().strftime('%Y-%m-%d %H:%M:%S'),
49
+ metrics.get('ticker', 'N/A'),
50
+ metrics.get('headline', 'N/A'),
51
+ f"{metrics.get('handler1_duration', 0):.4f}",
52
+ f"{metrics.get('handler2_duration', 0):.4f}",
53
+ f"{metrics.get('handler3_duration', 0):.4f}",
54
+ f"{metrics.get('handler4_duration', 0):.4f}",
55
+ f"{metrics.get('handler5_duration', 0):.4f}",
56
+ str(metrics.get('marketaux_processing_times', []))
57
+ ]
58
+
59
+ writer.writerow(row)
60
+ logger.info(f"[PERFORMANCE] Logged metrics for {metrics.get('ticker', 'N/A')}")
61
+
62
+ except Exception as e:
63
+ logger.error(f"[PERFORMANCE] Failed to log metrics: {e}")
src/news_scraper/helpers/timer.py ADDED
@@ -0,0 +1,138 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import time
2
+ import logging
3
+ from datetime import datetime
4
+ import statistics
5
+ import os
6
+
7
+ class Timer:
8
+ """
9
+ Helper class to track and log the time it takes to process news items.
10
+ Measures time from when an item is added to the queue until it's processed.
11
+ """
12
+ def __init__(self, log_file_path=None):
13
+ self.start_times = {} # Map of news_id -> start timestamp
14
+ self.processing_times = [] # Store recent processing times for stats
15
+ self.max_history = 100 # Maximum number of processing times to store
16
+ self.log_file_path = log_file_path or "news_processing_times.log"
17
+ self.logger = self._setup_logger()
18
+ self.log_to_file = False # Flag to control logging to file
19
+
20
+ def _setup_logger(self):
21
+ """Set up a dedicated logger for timing statistics"""
22
+ logger = logging.getLogger('news_processing_timer')
23
+ if not logger.handlers:
24
+ logger.setLevel(logging.INFO)
25
+
26
+ # Create log directory if it doesn't exist
27
+ log_dir = os.path.dirname(self.log_file_path)
28
+ if log_dir and not os.path.exists(log_dir):
29
+ os.makedirs(log_dir)
30
+
31
+ # if self.log_to_file:
32
+ # # Set up file handler for logging to a file
33
+ # file_handler = logging.FileHandler(self.log_file_path)
34
+ # file_handler.setFormatter(logging.Formatter(
35
+ # '%(asctime)s - %(levelname)s - %(message)s'
36
+ # ))
37
+ # logger.addHandler(file_handler)
38
+
39
+ # Console handler for immediate visibility
40
+ console_handler = logging.StreamHandler()
41
+ console_handler.setFormatter(logging.Formatter(
42
+ '%(asctime)s - %(name)s - %(levelname)s - %(message)s'
43
+ ))
44
+ logger.addHandler(console_handler)
45
+
46
+ return logger
47
+
48
+ def start_timing(self, news_item):
49
+ """Start timing for a news item when it enters the queue"""
50
+ # Get a unique ID for the news item - prefer id attribute if available
51
+ news_id = getattr(news_item, 'id', str(id(news_item)))
52
+
53
+ # Store the start time
54
+ self.start_times[news_id] = time.time()
55
+
56
+ # Log the event at debug level
57
+ item_title = getattr(news_item, 'headline',
58
+ getattr(news_item, 'title', f"ID: {news_id}"))
59
+ self.logger.debug(f"Started timing for news item: {item_title}")
60
+
61
+ return news_id
62
+
63
+ def stop_timing(self, news_item):
64
+ """Stop timing for a news item and log the processing time"""
65
+ end_time = time.time()
66
+
67
+ # Get a unique ID for the news item
68
+ news_id = getattr(news_item, 'id', str(id(news_item)))
69
+
70
+ if news_id in self.start_times:
71
+ start_time = self.start_times[news_id]
72
+ process_time = end_time - start_time
73
+
74
+ # Get title for better logging
75
+ item_title = getattr(news_item, 'headline',
76
+ getattr(news_item, 'title', f"ID: {news_id}"))
77
+
78
+ # Get symbols/ticker if available
79
+ symbols = getattr(news_item, 'symbols',
80
+ getattr(news_item, 'ticker', ''))
81
+ symbols_str = ', '.join(symbols) if isinstance(symbols, list) else str(symbols)
82
+
83
+ # Log the processing time
84
+ self.logger.info(f"Processing time: {process_time:.4f}s - {item_title} - Symbols: {symbols_str}")
85
+
86
+ # Store for statistics
87
+ self.processing_times.append(process_time)
88
+ if len(self.processing_times) > self.max_history:
89
+ self.processing_times.pop(0) # Remove oldest
90
+
91
+ # Remove the entry from the start times dictionary
92
+ del self.start_times[news_id]
93
+
94
+ return process_time
95
+ else:
96
+ self.logger.warning(f"No start time found for news item: {item_title}")
97
+ return None
98
+
99
+ def get_queue_stats(self):
100
+ """Get statistics about items currently in the queue"""
101
+ current_time = time.time()
102
+ waiting_times = []
103
+
104
+ for news_id, start_time in self.start_times.items():
105
+ waiting_time = current_time - start_time
106
+ waiting_times.append(waiting_time)
107
+
108
+ stats = {
109
+ 'queue_size': len(waiting_times),
110
+ 'avg_wait_time': sum(waiting_times) / len(waiting_times) if waiting_times else 0,
111
+ 'max_wait_time': max(waiting_times) if waiting_times else 0,
112
+ 'min_wait_time': min(waiting_times) if waiting_times else 0
113
+ }
114
+
115
+ self.logger.info(f"Queue stats: {stats['queue_size']} items, avg wait: {stats['avg_wait_time']:.2f}s, max wait: {stats['max_wait_time']:.2f}s")
116
+ return stats
117
+
118
+ def get_processing_stats(self):
119
+ """Get statistics about recent processing times"""
120
+ if not self.processing_times:
121
+ return {
122
+ 'count': 0,
123
+ 'avg': 0,
124
+ 'max': 0,
125
+ 'min': 0,
126
+ 'median': 0
127
+ }
128
+
129
+ stats = {
130
+ 'count': len(self.processing_times),
131
+ 'avg': sum(self.processing_times) / len(self.processing_times),
132
+ 'max': max(self.processing_times),
133
+ 'min': min(self.processing_times),
134
+ 'median': statistics.median(self.processing_times) if len(self.processing_times) > 0 else 0
135
+ }
136
+
137
+ self.logger.info(f"Processing stats: {stats['count']} items, avg: {stats['avg']:.4f}s, median: {stats['median']:.4f}s, min: {stats['min']:.4f}s, max: {stats['max']:.4f}s")
138
+ return stats
src/news_scraper/interfaces/__init__.py ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ # FILE: /stock-news-scraper/stock-news-scraper/src/interfaces/__init__.py
2
+ # This file is intentionally left blank.
src/news_scraper/main.py ADDED
@@ -0,0 +1,69 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import time
2
+ import logging
3
+ from news_scraper.services.news_processor import NewsProcessor
4
+ from news_scraper.adapters.alpaca_ws import AlpacaNewsFeedAdapter
5
+ from news_scraper.helpers.timer import Timer
6
+
7
+ def main():
8
+ # Configure logging
9
+ logging.basicConfig(
10
+ level=logging.INFO,
11
+ format='%(asctime)s - %(levelname)s - %(message)s',
12
+ datefmt='%Y-%m-%d %H:%M:%S'
13
+ )
14
+
15
+ # Initialize the timer
16
+ timer = Timer("logs/news_processing_times.log")
17
+
18
+ # Initialize the news processor
19
+ news_processor = NewsProcessor()
20
+
21
+ # Register an async callback for news processing
22
+ async def news_callback(news_item):
23
+ # Stop timing when the news item is processed
24
+ timer.stop_timing(news_item)
25
+ print(f"[PROCESSOR] [FUNC] Processing | {news_item.headline if hasattr(news_item, 'headline') else ''}")
26
+
27
+ news_processor.register_callback(news_callback)
28
+
29
+ # Start processing news items
30
+ news_processor.start_processing()
31
+
32
+ # Initialize AlpacaNewsFeedAdapter
33
+ def print_news(news_item):
34
+ # Start timing when the news item enters the queue
35
+ timer.start_timing(news_item)
36
+ print(f"[PROCESSOR] [QUEUE] News item | {news_item.headline}")
37
+ news_processor.add_news(news_item)
38
+
39
+ # Create the Alpaca adapter and register callback
40
+ alpaca_adapter = AlpacaNewsFeedAdapter()
41
+ alpaca_adapter.register_callback(print_news)
42
+
43
+ # Initialize CalendarProcessor
44
+ from news_scraper.services.calendar_processor import CalendarProcessor
45
+ calendar_processor = CalendarProcessor()
46
+ last_run_date = None
47
+
48
+ # Keep the main thread alive to receive messages
49
+ try:
50
+ while True:
51
+ # Run Calendar Processor daily
52
+ from datetime import date
53
+ current_date = date.today()
54
+ if last_run_date != current_date:
55
+ calendar_processor.run_daily_scan()
56
+ last_run_date = current_date
57
+
58
+ # Periodically log queue statistics
59
+ if time.time() % 120 < 1: # Roughly every two minutes
60
+ timer.get_queue_stats()
61
+ timer.get_processing_stats()
62
+ time.sleep(1)
63
+ except KeyboardInterrupt:
64
+ print("Exiting...")
65
+ alpaca_adapter.close()
66
+ news_processor.stop_processing()
67
+
68
+ if __name__ == "__main__":
69
+ main()
src/news_scraper/models/__init__.py ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ # FILE: /stock-news-scraper/stock-news-scraper/src/models/__init__.py
2
+
3
+ # This file is intentionally left blank.
src/news_scraper/models/article.py ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ class Article:
2
+ def __init__(self, url: str, title: str, description: str, score: float, ticker: str,time:str):
3
+ self.url = url
4
+ self.title = title
5
+ self.description = description
6
+ self.score = score
7
+ self.ticker = ticker
8
+ self.time=time
9
+
10
+ def __repr__(self):
11
+ return self.__print__()
12
+
13
+ def __print__(self):
14
+ return f"Article(title={self.title}, url={self.url}, score={self.score}, ticker={self.ticker}, time={self.time})"