Papaflessas's picture
Deploy Signal Generator app
3fe0726
import os
import glob
import pandas as pd
from news_scraper.nlp_models.finbert_tone import FinBertSentimentAnalyzer_Tone
from news_scraper.nlp_models.finbert_prosusAI import FinBertSentimentAnalyzer_ProsusAI
from news_scraper.nlp_models.finbert_finetuned import FinBertSentimentAnalyzer_Finetuned
from news_scraper.nlp_models.sigma import Sigma
from news_scraper.nlp_models.distilroberta import DistilRoBERTa
from news_scraper.nlp_models.finbert_regressor import FinbertRegressor
from news_scraper.nlp_models.finbertTweet import FinBertTweet
from news_scraper.nlp_models.robertalarge import RobertaLarge
def main():
# 1) collect all news_sentiment CSVs
#logs_dir = os.path.abspath(r"C:\Users\m.kontos\Desktop\repos\stock-alchemist\src\logs") # up to project root/logs
#out_dir = os.path.abspath(r"C:\Users\m.kontos\Desktop\repos\stock-alchemist\src\logs")
logs_dir = os.path.abspath(r"C:\Users\M\Desktop\repos\gotti\LLaMAVestor\src\logs")
out_dir = os.path.abspath(r"C:\Users\M\Desktop\repos\gotti\LLaMAVestor\src\logs")
pattern = os.path.join(logs_dir, '*news_sentiment*.xlsx')
files = glob.glob(pattern)
# 2) load into single DF
print(f'Found {len(files)} files to aggregate.')
if len(files) == 0:
print('No files found. Exiting.')
return
df = pd.concat(
(pd.read_excel(f) if f.lower().endswith('.xlsx') else pd.read_csv(f)
for f in files),
ignore_index=True
)
# 3) drop unused cols
df = df.drop(columns=[
'Timestamp','NewsID','URL','Source',
'Symbols','SentimentScore','SentimentAnalysis','Time to Process'
], errors='ignore')
# 4) run each finbert_ model and add its column
models = {
'Finetuned': FinBertSentimentAnalyzer_Finetuned(),
'Sigma': Sigma(),
'DistilRoBERTa': DistilRoBERTa(),
'FinbertRegressor': FinbertRegressor(),
'RobertaLarge': RobertaLarge()
}
models= { 'RobertaLarge': RobertaLarge()
}
for label, model in models.items():
df[label] = df['Headline'].apply(model.predict_sentiment)
# 5) ensure output folder and write to Excel
os.makedirs(out_dir, exist_ok=True)
out_path = os.path.join(out_dir, 'model_comarison.xlsx')
df.to_excel(out_path, index=False)
if __name__ == '__main__':
main()