Spaces:

chrisvnz
/

trend-sentiment

Runtime error

App Files Files Community

trend-sentiment / app.py

chrisvnz

Update app.py

9070c36 over 2 years ago

raw

history blame contribute delete

13.2 kB

	import plotly.graph_objects as go
	import streamlit as st
	import yfinance as yf
	import pandas as pd
	import numpy as np
	from urllib.request import urlopen, Request
	from bs4 import BeautifulSoup
	import plotly
	import plotly.express as px
	import nltk
	nltk.downloader.download('vader_lexicon')
	from nltk.sentiment.vader import SentimentIntensityAnalyzer
	from datetime import datetime
	from sklearn.preprocessing import MinMaxScaler

	finviz_url = 'https://finviz.com/quote.ashx?t='

	sector_etf_mapping = {
	'Consumer Durables': 'XLY',
	'Consumer Discretionary': 'XLY',
	'Consumer Staples': 'XLP',
	'Energy': 'XLE',
	'Financials': 'XLF',
	'Health Care': 'XLV',
	'Industrials': 'XLI',
	'Materials': 'XLB',
	'Real Estate': 'XLRE',
	'Technology': 'XLK',
	'Utilities': 'XLU',
	'Communication Services': 'XLC'
	}


	# Create two functions to calculate if a level is SUPPORT or a RESISTANCE level through fractal identification
	def is_Suppport_Level(df, i):
	support = df['Low'][i] < df['Low'][i - 1] and df['Low'][i] < df['Low'][i + 1] and df['Low'][i + 1] < df['Low'][i + 2] and df['Low'][i - 1] < df['Low'][i - 2]
	return support

	def is_Resistance_Level(df, i):
	resistance = df['High'][i] > df['High'][i - 1] and df['High'][i] > df['High'][i + 1] and df['High'][i + 1] > df['High'][i + 2] and df['High'][i - 1] > df['High'][i - 2]
	return resistance

	# This function, given a price value, returns True or False depending on if it is too near to some previously discovered key level.
	def distance_from_mean(level, levels, mean):
	return np.sum([abs(level - y) < mean for y in levels]) == 0


	# Function to get the trend of a given stock or ETF
	def get_trend(ticker, period='1mo'):
	stock = yf.Ticker(ticker)
	hist = stock.history(period=period)
	return hist['Close']

	# Function to normalize a pandas series
	def normalize(series):
	if len(series.unique()) > 1: # Check if the series has more than one unique value
	scaler = MinMaxScaler(feature_range=(0, 1))
	scaled_values = scaler.fit_transform(series.values.reshape(-1,1))
	return pd.Series(scaled_values.flatten(), index=series.index)
	else:
	return series # If the series has only one unique value, return it as is


	# Function to get the trend of a given sector or industry
	def get_trend_from_csv(df, column, value, period='1mo'):
	# Filter the dataframe by the given sector or industry
	df_filtered = df[df[column] == value]
	# Calculate the mean of the closing prices of all stocks in the sector or industry
	trend = df_filtered.resample(period).mean()
	return trend

	def get_news(ticker):
	url = finviz_url + ticker
	req = Request(url=url,headers={'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:20.0) Gecko/20100101 Firefox/20.0'})
	response = urlopen(req)
	# Read the contents of the file into 'html'
	html = BeautifulSoup(response)
	# Find 'news-table' in the Soup and load it into 'news_table'
	news_table = html.find(id='news-table')
	return news_table

	# parse news into dataframe
	def parse_news(news_table):
	parsed_news = []

	for x in news_table.findAll('tr'):
	# occasionally x (below) may be None when the html table is poorly formatted, skip it in try except instead of throwing an error and exiting
	# may also use an if loop here to check if x is None first
	try:
	# read the text from each tr tag into text
	# get text from a only
	text = x.a.get_text()
	# splite text in the td tag into a list
	date_scrape = x.td.text.split()
	# if the length of 'date_scrape' is 1, load 'time' as the only element

	if len(date_scrape) == 1:
	time = date_scrape[0]
	# else load 'date' as the 1st element and 'time' as the second
	else:
	date = date_scrape[0]
	time = date_scrape[1]

	# Append ticker, date, time and headline as a list to the 'parsed_news' list
	parsed_news.append([date, time, text])


	except:
	pass

	# Set column names
	columns = ['date', 'time', 'headline']
	# Convert the parsed_news list into a DataFrame called 'parsed_and_scored_news'
	parsed_news_df = pd.DataFrame(parsed_news, columns=columns)
	# Create a pandas datetime object from the strings in 'date' and 'time' column
	parsed_news_df['datetime'] = pd.to_datetime(parsed_news_df['date'] + ' ' + parsed_news_df['time'])

	return parsed_news_df



	def score_news(parsed_news_df):
	# Instantiate the sentiment intensity analyzer
	vader = SentimentIntensityAnalyzer()

	# Iterate through the headlines and get the polarity scores using vader
	scores = parsed_news_df['headline'].apply(vader.polarity_scores).tolist()

	# Convert the 'scores' list of dicts into a DataFrame
	scores_df = pd.DataFrame(scores)

	# Join the DataFrames of the news and the list of dicts
	parsed_and_scored_news = parsed_news_df.join(scores_df, rsuffix='_right')
	parsed_and_scored_news = parsed_and_scored_news.set_index('datetime')
	parsed_and_scored_news = parsed_and_scored_news.drop(['date', 'time'], 1)
	parsed_and_scored_news = parsed_and_scored_news.rename(columns={"compound": "sentiment_score"})

	return parsed_and_scored_news

	def plot_hourly_sentiment(parsed_and_scored_news, ticker):

	# Group by date and ticker columns from scored_news and calculate the mean
	mean_scores = parsed_and_scored_news.resample('H').mean()

	# Plot a bar chart with plotly
	fig = px.bar(mean_scores, x=mean_scores.index, y='sentiment_score', title = ticker + ' Hourly Sentiment Scores')
	return fig # instead of using fig.show(), we return fig and turn it into a graphjson object for displaying in web page later

	def plot_daily_sentiment(parsed_and_scored_news, ticker):

	# Group by date and ticker columns from scored_news and calculate the mean
	mean_scores = parsed_and_scored_news.resample('D').mean()

	# Plot a bar chart with plotly
	fig = px.bar(mean_scores, x=mean_scores.index, y='sentiment_score', title = ticker + ' Daily Sentiment Scores')
	return fig # instead of using fig.show(), we return fig and turn it into a graphjson object for displaying in web page later

	# Streamlit app
	st.set_page_config(page_title = "Stock Trend and News Sentiment Analyzer", layout = "wide")

	st.header("Stock Trend & News Sentiment Analyzer")

	st.markdown('Enter a stock symbol / ticker, then upload your CSV (from TradingView) with the following columns present: Ticker,Description,Price,Sector. Select the matching sector afterwards and run the analysis.')

	ticker = st.text_input('Enter Stock Ticker', '').upper()

	uploaded_file = st.file_uploader("Choose a CSV file", type="csv")
	if uploaded_file is not None:
	data = pd.read_csv(uploaded_file)

	try:
	# Display company name, last close, and sector from CSV
	company_name = data.loc[data['Ticker'] == ticker, 'Description'].values[0]
	last_close = data.loc[data['Ticker'] == ticker, 'Price'].values[0]
	csv_sector = data.loc[data['Ticker'] == ticker, 'Sector'].values[0]
	st.write(f"Company: {company_name} ({csv_sector})")
	st.write(f"Last Close: {last_close}")


	# Define a list of all possible sectors and their corresponding ETFs
	all_sectors = ['XLY', 'XLP', 'XLE', 'XLF', 'XLV', 'XLI', 'XLB', 'XLRE', 'XLK', 'XLU', 'XLC']
	sector_mapping = {
	'XLY': 'Consumer Discretionary',
	'XLP': 'Consumer Staples',
	'XLE': 'Energy',
	'XLF': 'Financials',
	'XLV': 'Health Care',
	'XLI': 'Industrials',
	'XLB': 'Materials',
	'XLRE': 'Real Estate',
	'XLK': 'Technology',
	'XLU': 'Utilities',
	'XLC': 'Communication Services'
	}

	# Get sector from CSV
	sector = data.loc[data['Ticker'] == ticker, 'Sector'].values[0]

	# If the sector is not found in the list of all sectors, set it to '--Not Found--'
	if sector not in sector_mapping.values():
	sector = '--Not Found--'
	else:
	# Convert the sector to its corresponding ETF
	sector = list(sector_mapping.keys())[list(sector_mapping.values()).index(sector)]

	# Display a dropdown menu for the sector with the current sector selected by default
	sector = st.selectbox('Sector', options=all_sectors, index=all_sectors.index(sector) if sector in all_sectors else 0)

	# Display the sector mapping table
	st.table(pd.DataFrame(list(sector_mapping.items()), columns=['ETF', 'Sector']))


	if st.button('Submit'):
	# Get trends
	stock_trend = get_trend(ticker)
	sector_trend = get_trend(sector)

	# Normalize trends
	scaler = MinMaxScaler()
	stock_trend = pd.DataFrame(scaler.fit_transform(stock_trend.values.reshape(-1,1)), index=stock_trend.index, columns=['Close'])
	sector_trend = pd.DataFrame(scaler.fit_transform(sector_trend.values.reshape(-1,1)), index=sector_trend.index, columns=['Close'])

	# Plot trends
	fig = go.Figure()
	fig.add_trace(go.Scatter(x=stock_trend.index, y=stock_trend['Close'], mode='lines', name=ticker))
	fig.add_trace(go.Scatter(x=sector_trend.index, y=sector_trend['Close'], mode='lines', name=sector))
	fig.update_layout(title='Stock and Sector Trend over the past 30 days', xaxis_title='Date', yaxis_title='Normalized Close Price', autosize=False, width=2000, height=1200)
	st.plotly_chart(fig)


	ticker2 = yf.Ticker(ticker)
	df = ticker2.history(period="1mo", actions=False)

	# Creating a list and feeding it the identified support and resistance levels via the Support and Resistance functions
	levels = []
	level_types = []
	mean = np.mean(df['High'] - df['Low'])

	for i in range(2, df.shape[0] - 2):

	if is_Suppport_Level(df, i):
	level = df['Low'][i].round(2)

	if distance_from_mean(level, levels, mean):
	levels.append(level)
	level_types.append('Support')

	elif is_Resistance_Level(df, i):
	level = df['High'][i].round(2)

	if distance_from_mean(level, levels, mean):
	levels.append(level)
	level_types.append('Resistance')

	# Plotting the data
	fig2 = go.Figure(data=[go.Candlestick(x=df.index,
	open=df['Open'],
	high=df['High'],
	low=df['Low'],
	close=df['Close'])])

	for level, level_type in zip(levels, level_types):
	fig2.add_trace(go.Scatter(x=df.index, y=[level]*len(df.index), mode='lines', name=level_type))

	fig2.update_layout(title='Support and Resistance levels for ' + ticker,
	xaxis_title='Date',
	yaxis_title='Price',
	autosize=False,
	width=2000,
	height=1200)


	st.plotly_chart(fig2)




	st.subheader("Hourly and Daily Sentiment of {} Stock".format(ticker))
	news_table = get_news(ticker)
	parsed_news_df = parse_news(news_table)
	parsed_and_scored_news = score_news(parsed_news_df)
	fig_hourly = plot_hourly_sentiment(parsed_and_scored_news, ticker)
	fig_daily = plot_daily_sentiment(parsed_and_scored_news, ticker)

	st.plotly_chart(fig_hourly)
	st.plotly_chart(fig_daily)

	description = """
	The above chart averages the sentiment scores of {} stock hourly and daily.
	The table below gives each of the most recent headlines of the stock and the negative, neutral, positive and an aggregated sentiment score.
	The news headlines are obtained from the FinViz website.
	Sentiments are given by the nltk.sentiment.vader Python library.
	Adapted from https://github.com/damianboh/stock_sentiment_streamlit
	""".format(ticker)

	st.write(description)
	st.table(parsed_and_scored_news)

	except Exception as e:
	print(str(e))
	st.write("Enter a stock ticker, e.g. 'AAPL' above and hit Enter.")
	else:
	st.write("Please upload a CSV file.")

	hide_streamlit_style = """
	<style>
	#MainMenu {visibility: hidden;}
	footer {visibility: hidden;}
	</style>
	"""
	st.markdown(hide_streamlit_style, unsafe_allow_html=True)

	# streamlit run "c:/Dropbox/BIMWERX/_work/YouTube/elephant/230701-10 - ML Predict/src/stock_sentiment2.py"