import os import glob import pandas as pd from news_scraper.nlp_models.finbert_tone import FinBertSentimentAnalyzer_Tone from news_scraper.nlp_models.finbert_prosusAI import FinBertSentimentAnalyzer_ProsusAI from news_scraper.nlp_models.finbert_finetuned import FinBertSentimentAnalyzer_Finetuned from news_scraper.nlp_models.sigma import Sigma from news_scraper.nlp_models.distilroberta import DistilRoBERTa from news_scraper.nlp_models.finbert_regressor import FinbertRegressor from news_scraper.nlp_models.finbertTweet import FinBertTweet from news_scraper.nlp_models.robertalarge import RobertaLarge def main(): # 1) collect all news_sentiment CSVs #logs_dir = os.path.abspath(r"C:\Users\m.kontos\Desktop\repos\stock-alchemist\src\logs") # up to project root/logs #out_dir = os.path.abspath(r"C:\Users\m.kontos\Desktop\repos\stock-alchemist\src\logs") logs_dir = os.path.abspath(r"C:\Users\M\Desktop\repos\gotti\LLaMAVestor\src\logs") out_dir = os.path.abspath(r"C:\Users\M\Desktop\repos\gotti\LLaMAVestor\src\logs") pattern = os.path.join(logs_dir, '*news_sentiment*.xlsx') files = glob.glob(pattern) # 2) load into single DF print(f'Found {len(files)} files to aggregate.') if len(files) == 0: print('No files found. Exiting.') return df = pd.concat( (pd.read_excel(f) if f.lower().endswith('.xlsx') else pd.read_csv(f) for f in files), ignore_index=True ) # 3) drop unused cols df = df.drop(columns=[ 'Timestamp','NewsID','URL','Source', 'Symbols','SentimentScore','SentimentAnalysis','Time to Process' ], errors='ignore') # 4) run each finbert_ model and add its column models = { 'Finetuned': FinBertSentimentAnalyzer_Finetuned(), 'Sigma': Sigma(), 'DistilRoBERTa': DistilRoBERTa(), 'FinbertRegressor': FinbertRegressor(), 'RobertaLarge': RobertaLarge() } models= { 'RobertaLarge': RobertaLarge() } for label, model in models.items(): df[label] = df['Headline'].apply(model.predict_sentiment) # 5) ensure output folder and write to Excel os.makedirs(out_dir, exist_ok=True) out_path = os.path.join(out_dir, 'model_comarison.xlsx') df.to_excel(out_path, index=False) if __name__ == '__main__': main()