File size: 2,324 Bytes
3fe0726
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
import os
import glob
import pandas as pd
from news_scraper.nlp_models.finbert_tone import FinBertSentimentAnalyzer_Tone
from news_scraper.nlp_models.finbert_prosusAI import FinBertSentimentAnalyzer_ProsusAI
from news_scraper.nlp_models.finbert_finetuned import FinBertSentimentAnalyzer_Finetuned
from news_scraper.nlp_models.sigma import Sigma
from news_scraper.nlp_models.distilroberta import DistilRoBERTa
from news_scraper.nlp_models.finbert_regressor import FinbertRegressor
from news_scraper.nlp_models.finbertTweet import FinBertTweet
from news_scraper.nlp_models.robertalarge import RobertaLarge

def main():
    # 1) collect all news_sentiment CSVs
    #logs_dir = os.path.abspath(r"C:\Users\m.kontos\Desktop\repos\stock-alchemist\src\logs")  # up to project root/logs
    #out_dir = os.path.abspath(r"C:\Users\m.kontos\Desktop\repos\stock-alchemist\src\logs")
    logs_dir = os.path.abspath(r"C:\Users\M\Desktop\repos\gotti\LLaMAVestor\src\logs")
    out_dir = os.path.abspath(r"C:\Users\M\Desktop\repos\gotti\LLaMAVestor\src\logs")
    pattern = os.path.join(logs_dir, '*news_sentiment*.xlsx')
    files = glob.glob(pattern)
    # 2) load into single DF
    print(f'Found {len(files)} files to aggregate.')
    if len(files) == 0:
        print('No files found. Exiting.')
        return
    df = pd.concat(
        (pd.read_excel(f) if f.lower().endswith('.xlsx') else pd.read_csv(f)
         for f in files),
        ignore_index=True
    )
    # 3) drop unused cols
    df = df.drop(columns=[
        'Timestamp','NewsID','URL','Source',
        'Symbols','SentimentScore','SentimentAnalysis','Time to Process'
    ], errors='ignore')
    # 4) run each finbert_ model and add its column
    models = {
        'Finetuned': FinBertSentimentAnalyzer_Finetuned(),
        'Sigma': Sigma(),
        'DistilRoBERTa': DistilRoBERTa(),
        'FinbertRegressor': FinbertRegressor(),
        'RobertaLarge': RobertaLarge()
    }
    models= {        'RobertaLarge': RobertaLarge()
}
    for label, model in models.items():
        df[label] = df['Headline'].apply(model.predict_sentiment)
    # 5) ensure output folder and write to Excel
    os.makedirs(out_dir, exist_ok=True)
    out_path = os.path.join(out_dir, 'model_comarison.xlsx')
    df.to_excel(out_path, index=False)

if __name__ == '__main__':
    main()