Spaces:
Build error
Build error
| ##Variables | |
| import os | |
| import streamlit as st | |
| import pathlib | |
| from langchain.embeddings import HuggingFaceEmbeddings,HuggingFaceInstructEmbeddings | |
| from langchain.vectorstores import FAISS | |
| from langchain.memory import ConversationBufferMemory | |
| from langchain.chat_models.openai import ChatOpenAI | |
| from langchain.callbacks.base import CallbackManager | |
| from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler | |
| from langchain import VectorDBQA | |
| import pandas as pd | |
| from langchain.chat_models import ChatOpenAI | |
| from langchain.prompts.chat import ( | |
| ChatPromptTemplate, | |
| SystemMessagePromptTemplate, | |
| AIMessagePromptTemplate, | |
| HumanMessagePromptTemplate, | |
| ) | |
| from langchain.schema import ( | |
| AIMessage, | |
| HumanMessage, | |
| SystemMessage | |
| ) | |
| from optimum.onnxruntime import ORTModelForSequenceClassification | |
| from transformers import pipeline, AutoTokenizer | |
| from optimum.pipelines import pipeline | |
| import tweepy | |
| import pandas as pd | |
| import numpy as np | |
| import plotly_express as px | |
| import plotly.graph_objects as go | |
| from datetime import datetime as dt | |
| from st_aggrid import GridOptionsBuilder, AgGrid, GridUpdateMode, DataReturnMode | |
| from datasets import Dataset | |
| from huggingface_hub import Repository | |
| def load_models(): | |
| '''load sentimant and topic clssification models''' | |
| sent_pipe = pipeline(task,model=sent_model_id, tokenizer=sent_model_id) | |
| topic_pipe = pipeline(task, model=topic_model_id, tokenizer=topic_model_id) | |
| return sent_pipe, topic_pipe | |
| def process_tweets(df,df_users): | |
| '''process tweets into a dataframe''' | |
| df['author'] = df['author'].astype(np.int64) | |
| df_merged = df.merge(df_users, on='author') | |
| tweet_list = df_merged['tweet'].tolist() | |
| sentiment, topic = pd.DataFrame(sentiment_classifier(tweet_list)), pd.DataFrame(topic_classifier(tweet_list)) | |
| sentiment.rename(columns={'score':'sentiment_confidence','label':'sentiment'}, inplace=True) | |
| topic.rename(columns={'score':'topic_confidence','label':'topic'}, inplace=True) | |
| df_group = pd.concat([df_merged,sentiment,topic],axis=1) | |
| df_group[['sentiment_confidence','topic_confidence']] = df_group[['sentiment_confidence','topic_confidence']].round(2).mul(100) | |
| df_tweets = df_group[['creation_time','username','tweet','sentiment','topic','sentiment_confidence','topic_confidence']] | |
| df_tweets = df_tweets.sort_values(by=['creation_time'],ascending=False) | |
| return df_tweets | |
| def create_vectorstore(texts,model,username,topic,creation_time): | |
| '''Create FAISS vectorstore''' | |
| if model == "hkunlp/instructor-large": | |
| emb = HuggingFaceInstructEmbeddings(model_name=model, | |
| query_instruction='Represent the Financial question for retrieving supporting documents: ', | |
| embed_instruction='Represent the Financial document for retrieval: ') | |
| elif model == "sentence-transformers/all-mpnet-base-v2": | |
| emb = HuggingFaceEmbeddings(model_name=model) | |
| docsearch = FAISS.from_texts(texts, emb, | |
| metadatas=[{"source": user,"topic":top,"extraction_time":tme} for user,top,tme in zip(username,topic,creation_time)]) | |
| return docsearch | |
| def embed_tweets(query,_prompt,_docsearch): | |
| '''Process file with latest tweets''' | |
| streaming_llm = ChatOpenAI(streaming=True, callback_manager=CallbackManager([StreamingStdOutCallbackHandler()]), verbose=True, temperature=0) | |
| chain_type_kwargs = {"prompt": _prompt} | |
| chain = VectorDBQA.from_chain_type( | |
| ChatOpenAI(temperature=0), | |
| chain_type="stuff", | |
| vectorstore=_docsearch, | |
| chain_type_kwargs=chain_type_kwargs, | |
| return_source_documents=True, | |
| k=5 | |
| ) | |
| result = chain({"query": query}) | |
| return result | |
| CONFIG = { | |
| "bearer_token": os.environ.get("bearer_token") | |
| } | |
| sent_model_id = 'nickmuchi/optimum-finbert-tone-finetuned-fintwitter-classification' | |
| topic_model_id = 'nickmuchi/optimum-finbert-tone-finetuned-finance-topic-classification' | |
| task = 'text-classification' | |
| sentiments = {"0": "Bearish", "1": "Bullish", "2": "Neutral"} | |
| topics = { | |
| "0": "Analyst Update", | |
| "1": "Fed | Central Banks", | |
| "2": "Company | Product News", | |
| "3": "Treasuries | Corporate Debt", | |
| "4": "Dividend", | |
| "5": "Earnings", | |
| "6": "Energy | Oil", | |
| "7": "Financials", | |
| "8": "Currencies", | |
| "9": "General News | Opinion", | |
| "10": "Gold | Metals | Materials", | |
| "11": "IPO", | |
| "12": "Legal | Regulation", | |
| "13": "M&A | Investments", | |
| "14": "Macro", | |
| "15": "Markets", | |
| "16": "Politics", | |
| "17": "Personnel Change", | |
| "18": "Stock Commentary", | |
| "19": "Stock Movement", | |
| } | |
| sentiment_classifier, topic_classifier = load_models() | |
| def convert_user_names(user_name: list): | |
| '''convert user_names to tweepy format''' | |
| users = [] | |
| for user in user_name: | |
| users.append(f"from:{user}") | |
| return " OR ".join(users) |