File size: 13,151 Bytes
b04d361
d202420
79de53b
 
 
d202420
 
 
 
 
 
 
 
79de53b
d202420
79de53b
d202420
79de53b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8f0c07c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
79de53b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d202420
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
79de53b
 
d202420
b39ade6
d202420
84b186c
d2ecd46
d202420
 
79de53b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b04d361
79de53b
 
 
 
 
 
 
 
 
 
 
b04d361
 
 
 
 
79de53b
8f0c07c
 
9070c36
8f0c07c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
79de53b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d202420
 
 
 
 
 
 
 
 
b04d361
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
import plotly.graph_objects as go
import streamlit as st
import yfinance as yf
import pandas as pd
import numpy as np
from urllib.request import urlopen, Request
from bs4 import BeautifulSoup
import plotly
import plotly.express as px
import nltk
nltk.downloader.download('vader_lexicon')
from nltk.sentiment.vader import SentimentIntensityAnalyzer
from datetime import datetime
from sklearn.preprocessing import MinMaxScaler

finviz_url = 'https://finviz.com/quote.ashx?t='

sector_etf_mapping = {
    'Consumer Durables': 'XLY',
    'Consumer Discretionary': 'XLY',
    'Consumer Staples': 'XLP',
    'Energy': 'XLE',
    'Financials': 'XLF',
    'Health Care': 'XLV',
    'Industrials': 'XLI',
    'Materials': 'XLB',
    'Real Estate': 'XLRE',
    'Technology': 'XLK',
    'Utilities': 'XLU',
    'Communication Services': 'XLC'
}


# Create two functions to calculate if a level is SUPPORT or a RESISTANCE level through fractal identification
def is_Suppport_Level(df, i):
  support = df['Low'][i] < df['Low'][i - 1] and df['Low'][i] < df['Low'][i + 1] and df['Low'][i + 1] < df['Low'][i + 2] and df['Low'][i - 1] < df['Low'][i - 2]
  return support

def is_Resistance_Level(df, i):
  resistance = df['High'][i] > df['High'][i - 1] and df['High'][i] > df['High'][i + 1] and df['High'][i + 1] > df['High'][i + 2] and df['High'][i - 1] > df['High'][i - 2]
  return resistance

# This function, given a price value, returns True or False depending on if it is too near to some previously discovered key level.
def distance_from_mean(level, levels, mean):
  return np.sum([abs(level - y) < mean for y in levels]) == 0


# Function to get the trend of a given stock or ETF
def get_trend(ticker, period='1mo'):
    stock = yf.Ticker(ticker)
    hist = stock.history(period=period)
    return hist['Close']

# Function to normalize a pandas series
def normalize(series):
    if len(series.unique()) > 1:  # Check if the series has more than one unique value
        scaler = MinMaxScaler(feature_range=(0, 1))
        scaled_values = scaler.fit_transform(series.values.reshape(-1,1))
        return pd.Series(scaled_values.flatten(), index=series.index)
    else:
        return series  # If the series has only one unique value, return it as is


# Function to get the trend of a given sector or industry
def get_trend_from_csv(df, column, value, period='1mo'):
    # Filter the dataframe by the given sector or industry
    df_filtered = df[df[column] == value]
    # Calculate the mean of the closing prices of all stocks in the sector or industry
    trend = df_filtered.resample(period).mean()
    return trend

def get_news(ticker):
    url = finviz_url + ticker
    req = Request(url=url,headers={'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:20.0) Gecko/20100101 Firefox/20.0'}) 
    response = urlopen(req)    
    # Read the contents of the file into 'html'
    html = BeautifulSoup(response)
    # Find 'news-table' in the Soup and load it into 'news_table'
    news_table = html.find(id='news-table')
    return news_table
	
# parse news into dataframe
def parse_news(news_table):
    parsed_news = []
    
    for x in news_table.findAll('tr'):
        # occasionally x (below) may be None when the html table is poorly formatted, skip it in try except instead of throwing an error and exiting
        # may also use an if loop here to check if x is None first	
        try:
            # read the text from each tr tag into text
            # get text from a only
            text = x.a.get_text() 
            # splite text in the td tag into a list 
            date_scrape = x.td.text.split()
            # if the length of 'date_scrape' is 1, load 'time' as the only element

            if len(date_scrape) == 1:
                time = date_scrape[0]				
			# else load 'date' as the 1st element and 'time' as the second    
            else:
                date = date_scrape[0]
                time = date_scrape[1]
			
            # Append ticker, date, time and headline as a list to the 'parsed_news' list
            parsed_news.append([date, time, text])        
            
            
        except:
            pass
			
    # Set column names
    columns = ['date', 'time', 'headline']
    # Convert the parsed_news list into a DataFrame called 'parsed_and_scored_news'
    parsed_news_df = pd.DataFrame(parsed_news, columns=columns)        
    # Create a pandas datetime object from the strings in 'date' and 'time' column
    parsed_news_df['datetime'] = pd.to_datetime(parsed_news_df['date'] + ' ' + parsed_news_df['time'])
			
    return parsed_news_df
        
    
        
def score_news(parsed_news_df):
    # Instantiate the sentiment intensity analyzer
    vader = SentimentIntensityAnalyzer()
    
    # Iterate through the headlines and get the polarity scores using vader
    scores = parsed_news_df['headline'].apply(vader.polarity_scores).tolist()

    # Convert the 'scores' list of dicts into a DataFrame
    scores_df = pd.DataFrame(scores)

    # Join the DataFrames of the news and the list of dicts
    parsed_and_scored_news = parsed_news_df.join(scores_df, rsuffix='_right')             
    parsed_and_scored_news = parsed_and_scored_news.set_index('datetime')    
    parsed_and_scored_news = parsed_and_scored_news.drop(['date', 'time'], 1)          
    parsed_and_scored_news = parsed_and_scored_news.rename(columns={"compound": "sentiment_score"})

    return parsed_and_scored_news

def plot_hourly_sentiment(parsed_and_scored_news, ticker):
   
    # Group by date and ticker columns from scored_news and calculate the mean
    mean_scores = parsed_and_scored_news.resample('H').mean()

    # Plot a bar chart with plotly
    fig = px.bar(mean_scores, x=mean_scores.index, y='sentiment_score', title = ticker + ' Hourly Sentiment Scores')
    return fig # instead of using fig.show(), we return fig and turn it into a graphjson object for displaying in web page later

def plot_daily_sentiment(parsed_and_scored_news, ticker):
   
    # Group by date and ticker columns from scored_news and calculate the mean
    mean_scores = parsed_and_scored_news.resample('D').mean()

    # Plot a bar chart with plotly
    fig = px.bar(mean_scores, x=mean_scores.index, y='sentiment_score', title = ticker + ' Daily Sentiment Scores')
    return fig # instead of using fig.show(), we return fig and turn it into a graphjson object for displaying in web page later

# Streamlit app
st.set_page_config(page_title = "Stock Trend and News Sentiment Analyzer", layout = "wide")

st.header("Stock Trend & News Sentiment Analyzer")

st.markdown('Enter a stock symbol / ticker, then upload your CSV (from TradingView) with the following columns present: Ticker,Description,Price,Sector. Select the matching sector afterwards and run the analysis.')

ticker = st.text_input('Enter Stock Ticker', '').upper()

uploaded_file = st.file_uploader("Choose a CSV file", type="csv")
if uploaded_file is not None:
    data = pd.read_csv(uploaded_file)

    try:
        # Display company name, last close, and sector from CSV
        company_name = data.loc[data['Ticker'] == ticker, 'Description'].values[0]
        last_close = data.loc[data['Ticker'] == ticker, 'Price'].values[0]
        csv_sector = data.loc[data['Ticker'] == ticker, 'Sector'].values[0]
        st.write(f"Company: {company_name} ({csv_sector})")
        st.write(f"Last Close: {last_close}")


        # Define a list of all possible sectors and their corresponding ETFs
        all_sectors = ['XLY', 'XLP', 'XLE', 'XLF', 'XLV', 'XLI', 'XLB', 'XLRE', 'XLK', 'XLU', 'XLC']
        sector_mapping = {
            'XLY': 'Consumer Discretionary',
            'XLP': 'Consumer Staples',
            'XLE': 'Energy',
            'XLF': 'Financials',
            'XLV': 'Health Care',
            'XLI': 'Industrials',
            'XLB': 'Materials',
            'XLRE': 'Real Estate',
            'XLK': 'Technology',
            'XLU': 'Utilities',
            'XLC': 'Communication Services'
        }

        # Get sector from CSV
        sector = data.loc[data['Ticker'] == ticker, 'Sector'].values[0]

        # If the sector is not found in the list of all sectors, set it to '--Not Found--'
        if sector not in sector_mapping.values():
            sector = '--Not Found--'
        else:
            # Convert the sector to its corresponding ETF
            sector = list(sector_mapping.keys())[list(sector_mapping.values()).index(sector)]

        # Display a dropdown menu for the sector with the current sector selected by default
        sector = st.selectbox('Sector', options=all_sectors, index=all_sectors.index(sector) if sector in all_sectors else 0)

        # Display the sector mapping table
        st.table(pd.DataFrame(list(sector_mapping.items()), columns=['ETF', 'Sector']))


        if st.button('Submit'):
            # Get trends
            stock_trend = get_trend(ticker)
            sector_trend = get_trend(sector)

            # Normalize trends
            scaler = MinMaxScaler()
            stock_trend = pd.DataFrame(scaler.fit_transform(stock_trend.values.reshape(-1,1)), index=stock_trend.index, columns=['Close'])
            sector_trend = pd.DataFrame(scaler.fit_transform(sector_trend.values.reshape(-1,1)), index=sector_trend.index, columns=['Close'])

            # Plot trends
            fig = go.Figure()
            fig.add_trace(go.Scatter(x=stock_trend.index, y=stock_trend['Close'], mode='lines', name=ticker))
            fig.add_trace(go.Scatter(x=sector_trend.index, y=sector_trend['Close'], mode='lines', name=sector))
            fig.update_layout(title='Stock and Sector Trend over the past 30 days', xaxis_title='Date', yaxis_title='Normalized Close Price', autosize=False, width=2000, height=1200)
            st.plotly_chart(fig)


            ticker2 = yf.Ticker(ticker)
            df = ticker2.history(period="1mo", actions=False)

            # Creating a list and feeding it the identified support and resistance levels via the Support and Resistance functions
            levels = []
            level_types = []
            mean = np.mean(df['High'] - df['Low'])

            for i in range(2, df.shape[0] - 2):

                if is_Suppport_Level(df, i):
                    level = df['Low'][i].round(2)

                    if distance_from_mean(level, levels, mean):
                        levels.append(level)
                        level_types.append('Support')

                elif is_Resistance_Level(df, i):
                    level = df['High'][i].round(2)

                    if distance_from_mean(level, levels, mean):
                        levels.append(level)
                        level_types.append('Resistance')

            # Plotting the data
            fig2 = go.Figure(data=[go.Candlestick(x=df.index,
                                                open=df['Open'],
                                                high=df['High'],
                                                low=df['Low'],
                                                close=df['Close'])])

            for level, level_type in zip(levels, level_types):
                fig2.add_trace(go.Scatter(x=df.index, y=[level]*len(df.index), mode='lines', name=level_type))

            fig2.update_layout(title='Support and Resistance levels for ' + ticker,
                    xaxis_title='Date',
                    yaxis_title='Price',
                    autosize=False,
                    width=2000,
                    height=1200)


            st.plotly_chart(fig2)




            st.subheader("Hourly and Daily Sentiment of {} Stock".format(ticker))
            news_table = get_news(ticker)
            parsed_news_df = parse_news(news_table)
            parsed_and_scored_news = score_news(parsed_news_df)
            fig_hourly = plot_hourly_sentiment(parsed_and_scored_news, ticker)
            fig_daily = plot_daily_sentiment(parsed_and_scored_news, ticker) 

            st.plotly_chart(fig_hourly)
            st.plotly_chart(fig_daily)

            description = """
                The above chart averages the sentiment scores of {} stock hourly and daily.
                The table below gives each of the most recent headlines of the stock and the negative, neutral, positive and an aggregated sentiment score.
                The news headlines are obtained from the FinViz website.
                Sentiments are given by the nltk.sentiment.vader Python library.
                Adapted from https://github.com/damianboh/stock_sentiment_streamlit
                """.format(ticker)
                
            st.write(description)     
            st.table(parsed_and_scored_news)
        
    except Exception as e:
        print(str(e))
        st.write("Enter a stock ticker, e.g. 'AAPL' above and hit Enter.")  
else:
    st.write("Please upload a CSV file.")

hide_streamlit_style = """
<style>
#MainMenu {visibility: hidden;}
footer {visibility: hidden;}
</style>
"""
st.markdown(hide_streamlit_style, unsafe_allow_html=True) 

# streamlit run "c:/Dropbox/BIMWERX/_work/YouTube/elephant/230701-10 - ML Predict/src/stock_sentiment2.py"