Spaces:
Running
Running
| import os | |
| import glob | |
| import pandas as pd | |
| from news_scraper.nlp_models.finbert_tone import FinBertSentimentAnalyzer_Tone | |
| from news_scraper.nlp_models.finbert_prosusAI import FinBertSentimentAnalyzer_ProsusAI | |
| from news_scraper.nlp_models.finbert_finetuned import FinBertSentimentAnalyzer_Finetuned | |
| from news_scraper.nlp_models.sigma import Sigma | |
| from news_scraper.nlp_models.distilroberta import DistilRoBERTa | |
| from news_scraper.nlp_models.finbert_regressor import FinbertRegressor | |
| from news_scraper.nlp_models.finbertTweet import FinBertTweet | |
| from news_scraper.nlp_models.robertalarge import RobertaLarge | |
| def main(): | |
| # 1) collect all news_sentiment CSVs | |
| #logs_dir = os.path.abspath(r"C:\Users\m.kontos\Desktop\repos\stock-alchemist\src\logs") # up to project root/logs | |
| #out_dir = os.path.abspath(r"C:\Users\m.kontos\Desktop\repos\stock-alchemist\src\logs") | |
| logs_dir = os.path.abspath(r"C:\Users\M\Desktop\repos\gotti\LLaMAVestor\src\logs") | |
| out_dir = os.path.abspath(r"C:\Users\M\Desktop\repos\gotti\LLaMAVestor\src\logs") | |
| pattern = os.path.join(logs_dir, '*news_sentiment*.xlsx') | |
| files = glob.glob(pattern) | |
| # 2) load into single DF | |
| print(f'Found {len(files)} files to aggregate.') | |
| if len(files) == 0: | |
| print('No files found. Exiting.') | |
| return | |
| df = pd.concat( | |
| (pd.read_excel(f) if f.lower().endswith('.xlsx') else pd.read_csv(f) | |
| for f in files), | |
| ignore_index=True | |
| ) | |
| # 3) drop unused cols | |
| df = df.drop(columns=[ | |
| 'Timestamp','NewsID','URL','Source', | |
| 'Symbols','SentimentScore','SentimentAnalysis','Time to Process' | |
| ], errors='ignore') | |
| # 4) run each finbert_ model and add its column | |
| models = { | |
| 'Finetuned': FinBertSentimentAnalyzer_Finetuned(), | |
| 'Sigma': Sigma(), | |
| 'DistilRoBERTa': DistilRoBERTa(), | |
| 'FinbertRegressor': FinbertRegressor(), | |
| 'RobertaLarge': RobertaLarge() | |
| } | |
| models= { 'RobertaLarge': RobertaLarge() | |
| } | |
| for label, model in models.items(): | |
| df[label] = df['Headline'].apply(model.predict_sentiment) | |
| # 5) ensure output folder and write to Excel | |
| os.makedirs(out_dir, exist_ok=True) | |
| out_path = os.path.join(out_dir, 'model_comarison.xlsx') | |
| df.to_excel(out_path, index=False) | |
| if __name__ == '__main__': | |
| main() | |