Spaces:
Build error
Build error
| ##Variables | |
| import os | |
| import streamlit as st | |
| import pathlib | |
| from langchain.embeddings import HuggingFaceEmbeddings,HuggingFaceInstructEmbeddings | |
| from langchain.text_splitter import RecursiveCharacterTextSplitter | |
| from langchain.vectorstores import FAISS | |
| from langchain.chat_models.openai import ChatOpenAI | |
| from langchain import VectorDBQA | |
| import pandas as pd | |
| from langchain.chat_models import ChatOpenAI | |
| from langchain.prompts.chat import ( | |
| ChatPromptTemplate, | |
| SystemMessagePromptTemplate, | |
| AIMessagePromptTemplate, | |
| HumanMessagePromptTemplate, | |
| ) | |
| from langchain.schema import ( | |
| AIMessage, | |
| HumanMessage, | |
| SystemMessage | |
| ) | |
| from optimum.onnxruntime import ORTModelForSequenceClassification | |
| from transformers import pipeline, AutoTokenizer | |
| from optimum.pipelines import pipeline | |
| import tweepy | |
| import pandas as pd | |
| import numpy as np | |
| import plotly_express as px | |
| import plotly.graph_objects as go | |
| from datetime import datetime as dt | |
| from st_aggrid import GridOptionsBuilder, AgGrid, GridUpdateMode, DataReturnMode | |
| from datasets import Dataset | |
| from huggingface_hub import Repository | |
| def load_models(): | |
| '''load sentimant and topic clssification models''' | |
| sent_pipe = pipeline(task,model=sent_model_id, tokenizer=sent_model_id) | |
| topic_pipe = pipeline(task, model=topic_model_id, tokenizer=topic_model_id) | |
| return sent_pipe, topic_pipe | |
| def process_tweets(df,df_users): | |
| '''process tweets into a dataframe''' | |
| df['author'] = df['author'].astype(np.int64) | |
| df_merged = df.merge(df_users, on='author') | |
| tweet_list = df_merged['tweet'].tolist() | |
| sentiment, topic = pd.DataFrame(sentiment_classifier(tweet_list)), pd.DataFrame(topic_classifier(tweet_list)) | |
| sentiment.rename(columns={'score':'sentiment_confidence','label':'sentiment'}, inplace=True) | |
| topic.rename(columns={'score':'topic_confidence','label':'topic'}, inplace=True) | |
| df_group = pd.concat([df_merged,sentiment,topic],axis=1) | |
| df_group[['sentiment_confidence','topic_confidence']] = df_group[['sentiment_confidence','topic_confidence']].round(2).mul(100) | |
| df_tweets = df_group[['creation_time','username','tweet','sentiment','topic','sentiment_confidence','topic_confidence']] | |
| df_tweets = df_tweets.sort_values(by=['creation_time'],ascending=False) | |
| return df_tweets | |
| def embed_tweets(file,model,query,prompt): | |
| '''Process file with latest tweets''' | |
| # Split tweets int chunks | |
| text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50) | |
| texts = text_splitter.split_text(file) | |
| if model == "hkunlp/instructor-large": | |
| emb = HuggingFaceInstructEmbeddings(model_name=model, | |
| query_instruction='Represent the Financial question for retrieving supporting documents: ', | |
| embed_instruction='Represent the Financial document for retrieval: ') | |
| elif model == "sentence-transformers/all-mpnet-base-v2": | |
| emb = HuggingFaceEmbeddings(model_name=model) | |
| docsearch = FAISS.from_texts(texts, emb) | |
| chain_type_kwargs = {"prompt": prompt} | |
| chain = VectorDBQA.from_chain_type( | |
| ChatOpenAI(temperature=0), | |
| chain_type="stuff", | |
| vectorstore=docsearch, | |
| chain_type_kwargs=chain_type_kwargs | |
| ) | |
| result = chain({"query": query}) | |
| return result | |
| CONFIG = { | |
| "bearer_token": os.environ.get("bearer_token") | |
| } | |
| sent_model_id = 'nickmuchi/optimum-finbert-tone-finetuned-fintwitter-classification' | |
| topic_model_id = 'nickmuchi/optimum-finbert-tone-finetuned-finance-topic-classification' | |
| task = 'text-classification' | |
| sentiments = {"0": "Bearish", "1": "Bullish", "2": "Neutral"} | |
| topics = { | |
| "0": "Analyst Update", | |
| "1": "Fed | Central Banks", | |
| "2": "Company | Product News", | |
| "3": "Treasuries | Corporate Debt", | |
| "4": "Dividend", | |
| "5": "Earnings", | |
| "6": "Energy | Oil", | |
| "7": "Financials", | |
| "8": "Currencies", | |
| "9": "General News | Opinion", | |
| "10": "Gold | Metals | Materials", | |
| "11": "IPO", | |
| "12": "Legal | Regulation", | |
| "13": "M&A | Investments", | |
| "14": "Macro", | |
| "15": "Markets", | |
| "16": "Politics", | |
| "17": "Personnel Change", | |
| "18": "Stock Commentary", | |
| "19": "Stock Movement", | |
| } | |
| user_name = [ | |
| "Investing.com", | |
| "(((The Daily Shot)))", | |
| "Bloomberg Markets", | |
| "FirstSquawk", | |
| "MarketWatch", | |
| "markets", | |
| "FinancialTimes", | |
| "CNBC", | |
| "ReutersBiz", | |
| "BreakingNews", | |
| "LiveSquawk", | |
| "NYSE", | |
| "WSJmarkets", | |
| "FT", | |
| "TheStreet", | |
| "ftfinancenews", | |
| "BloombergTV", | |
| "Nasdaq", | |
| "NYSE", | |
| "federalreserve", | |
| "NewYorkFed", | |
| "sffed", | |
| "WSJCentralBanks", | |
| "RichmondFed", | |
| "ecb", | |
| "stlouisfed", | |
| "WorldBank", | |
| "MarketCurrents", | |
| "OpenOutcrier", | |
| "BullTradeFinder", | |
| "WallStChatter", | |
| "Briefingcom", | |
| "SeekingAlpha", | |
| "realDonaldTrump", | |
| "AswathDamodaran", | |
| "ukarlewitz", | |
| "alphatrends", | |
| "Investor666", | |
| "ACInvestorBlog", | |
| "ZorTrades", | |
| "ScottNations", | |
| "TradersCorner", | |
| "TraderGoalieOne", | |
| "option_snipper", | |
| "jasonleavitt", | |
| "LMT978", | |
| "OptionsHawk", | |
| "andrewbtodd", | |
| "Terri1618", | |
| "SunriseTrader", | |
| "traderstewie", | |
| "TMLTrader", | |
| "IncredibleTrade", | |
| "NYFedResearch", | |
| "YahooFinance", | |
| "business", | |
| "economics", | |
| "IMFNews", | |
| "Market_Screener", | |
| "QuickTake", | |
| "NewsFromBW", | |
| "BNCommodities", | |
| ] | |
| user_id = [ | |
| "988955288", | |
| "423769635", | |
| "69620713", | |
| "59393368", | |
| "3295423333", | |
| "624413", | |
| "69620713", | |
| "4898091", | |
| "20402945", | |
| "15110357", | |
| "6017542", | |
| "21323268", | |
| "28164923", | |
| "18949452", | |
| "15281391", | |
| "11014272", | |
| "35002876", | |
| "18639734", | |
| "21323268", | |
| "26538229", | |
| "15072071", | |
| "117237387", | |
| "327484803", | |
| "16532451", | |
| "83466368", | |
| "71567590", | |
| "27860681", | |
| "15296897", | |
| "2334614718", | |
| "2222635612", | |
| "3382363841", | |
| "72928001", | |
| "23059499", | |
| "25073877", | |
| "33216611", | |
| "37284991", | |
| "15246621", | |
| "293458690", | |
| "55561590", | |
| "18560146", | |
| "244978426", | |
| "85523269", | |
| "276714687", | |
| "2806294664", | |
| "16205561", | |
| "1064700308", | |
| "61342056", | |
| "184126162", | |
| "405820375", | |
| "787439438964068352", | |
| "52166809", | |
| "2715646770", | |
| "47247213", | |
| "374672240", | |
| "19546277", | |
| "34713362", | |
| "144274618", | |
| "25098482", | |
| "102325185", | |
| "252751061", | |
| "976297820532518914", | |
| "804556370", | |
| ] | |
| sentiment_classifier, topic_classifier = load_models() | |
| def convert_user_names(user_name: list): | |
| '''convert user_names to tweepy format''' | |
| users = [] | |
| for user in user_name: | |
| users.append(f"from:{user}") | |
| return " OR ".join(users) |