File size: 9,638 Bytes
d03866e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
# -*- coding: utf-8 -*-
# Author: Qinghua Liu <liu.11085@osu.edu>
# License: Apache-2.0 License

import sys
import os
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))

import pandas as pd
import torch
import random, argparse
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from evaluation.metrics import get_metrics
from utils.slidingWindows import find_length_rank
from model_wrapper import *
from HP_list import Optimal_Uni_algo_HP_dict, Optimal_Multi_algo_HP_dict
import os
# Cuda devices
os.environ["CUDA_VISIBLE_DEVICES"] = "0"
# seeding
seed = 2024
torch.manual_seed(seed)
torch.cuda.manual_seed(seed)
torch.cuda.manual_seed_all(seed)
np.random.seed(seed)
random.seed(seed)
torch.backends.cudnn.benchmark = False
torch.backends.cudnn.deterministic = True
import os
print("CUDA Available: ", torch.cuda.is_available())
print("cuDNN Version: ", torch.backends.cudnn.version())
import pickle


def get_result(filename):
    pickle_filename = filename.replace('.csv', '_results.pkl')
    df = pickle.load(open(pickle_filename, 'rb'))

    return df['anomaly_score'].to_numpy()

if __name__ == '__main__':
    # Resolve dataset directory relative to this file (portable across machines)
    parser = argparse.ArgumentParser(description='Running TSB-AD')
    parser.add_argument('--mode', type=str, default='uni', choices=['uni', 'multi'],
                    help='Encoder mode: uni for univariate, multi for multivariate')
    parser.add_argument('--AD_Name', type=str, default='Time_RCD')
    parser.add_argument('--filename', type=str, default='')
    parser.add_argument('--data_direc', type=str, default='')
    parser.add_argument('--save', type=bool, default=True)
    Multi = parser.parse_args().mode == 'multi'
    # Initialize list to store all results
    all_results = []
    all_logits = []
    if Multi:
        filter_list = [
                "GHL",
                "Daphnet",
                "Exathlon",
                "Genesis",
                "OPP",
                "SMD",
                # "SWaT",
                # "PSM",
                "SMAP",
                "MSL",
                "CreditCard",
                "GECCO",
                "MITDB",
                "SVDB",
                "LTDB",
                "CATSv2",
                "TAO"
            ]
        base_dir = 'datasets/TSB-AD-M/'
        files = os.listdir(base_dir)
    else:
        filter_list = [
                    "Daphnet",
                    "CATSv2",
                    "SWaT",
                    "LTDB",
                    "TAO",
                    "Exathlon",
                    "MITDB",
                    "MSL",
                    "SMAP",
                    "SMD",
                    "SVDB",
                    "OPP",

                # "IOPS",
                # "MGAB",
                # "NAB",
                # "NEK",
                # "Power",
                # "SED",
                # "Stock",
                # "TODS",
                # "WSD",
                # "YAHOO",
                # "UCR"
                ]
        base_dir = 'datasets/TSB-AD-U/'
        files = os.listdir(base_dir)



    # ## ArgumentParser
    for file in files:

        if any(filter_item in file for filter_item in filter_list):
            print(f"Skipping file: {file} due to filter criteria.")
            continue

        # Clear GPU memory before processing each file
        if torch.cuda.is_available():
            torch.cuda.empty_cache()
            torch.cuda.synchronize()

        args = parser.parse_args()
        # Set the file-specific values
        args.filename = file
        args.data_direc = base_dir
        
        if Multi:
            Optimal_Det_HP = Optimal_Multi_algo_HP_dict[args.AD_Name]
        else:
            Optimal_Det_HP = Optimal_Uni_algo_HP_dict[args.AD_Name]
        # try:
            # Read data using a proper path join
        df_path = os.path.join(args.data_direc, args.filename)
        df = pd.read_csv(df_path).dropna()
        data = df.iloc[:, 0:-1].values.astype(float)
        label = df['Label'].astype(int).to_numpy()

        slidingWindow = find_length_rank(data, rank=1)
        train_index = args.filename.split('.')[0].split('_')[-3]
        data_train = data[:int(train_index), :]
        test_data  = data[int(train_index):, :]
        label_test = label[int(train_index):]



        logits = None  # ensure defined irrespective of branch

        print(f"Running {args.AD_Name} on {args.filename}...")
        if args.AD_Name in Semisupervise_AD_Pool:
            output = run_Semisupervise_AD(args.AD_Name, data_train, test_data, **Optimal_Det_HP)
        elif args.AD_Name in Unsupervise_AD_Pool:
            if args.AD_Name == 'Time_RCD':
                # For Time_RCD, we need to pass the test data directly
                output, logits = run_Unsupervise_AD(args.AD_Name, data_train, test_data, Multi=Multi, **Optimal_Det_HP)
            else:
                output = run_Unsupervise_AD(args.AD_Name, data_train, test_data, **Optimal_Det_HP)
        else:
            raise Exception(f"{args.AD_Name} is not defined")

        if isinstance(output, np.ndarray):
            # output = MinMaxScaler(feature_range=(0,1)).fit_transform(output.reshape(-1,1)).ravel()

            # Fix shape mismatch issue - ensure output and labels have the same length
            min_length = min(len(output), len(label_test))  # Use label_test instead of label
            output_aligned = output[:min_length]
            label_aligned = label_test[:min_length]
            logits_aligned = None
            if logits is not None:
                logits_aligned = logits[:min_length]


            evaluation_result = get_metrics(output_aligned, label_aligned, slidingWindow=slidingWindow, pred=output_aligned > (np.mean(output_aligned)+3*np.std(output_aligned)))
            evaluation_result_logits = None
            if logits is not None:
                evaluation_result_logits = get_metrics(logits_aligned, label_aligned, slidingWindow=slidingWindow, pred=logits_aligned > (np.mean(logits_aligned)+3*np.std(logits_aligned)))
            
            print(evaluation_result)

            # Prepare result dictionary with filename and all metrics
            result_dict = {
                'filename': args.filename,
                'AD_Name': args.AD_Name,
                'sliding_window': slidingWindow,
                'train_index': train_index,
                'data_shape': f"{data.shape[0]}x{data.shape[1]}",
                'output_length': len(output),
                'label_length': len(label_test),  # Use label_test length
                'aligned_length': min_length,
                **evaluation_result  # Unpack all evaluation metrics
            }
            all_results.append(result_dict)

            if logits is not None:
                logit_dict = {
                    'filename': args.filename,
                    'AD_Name': args.AD_Name,
                    'sliding_window': slidingWindow,
                    'train_index': train_index,
                    'data_shape': f"{data.shape[0]}x{data.shape[1]}",
                    'output_length': len(logits),
                    'label_length': len(label_test),  # Use label_test length
                    'aligned_length': min_length,
                    **evaluation_result_logits  # Unpack all evaluation metrics for logits
                }
                all_logits.append(logit_dict)
            # Save value, label, and anomaly scores to pickle file
            if args.save:
                output_filename = f'{args.filename.split(".")[0]}_results.pkl'
                output_path = os.path.join(
                    os.path.join(os.getcwd(), (f"{'Multi' if Multi else 'Uni'}_"+args.AD_Name), output_filename))
                if not os.path.exists(output_path):
                    os.makedirs(os.path.dirname(output_path), exist_ok=True)
                pd.DataFrame({
                    'value': test_data[:min_length].tolist(),
                    'label': label_aligned.tolist(),
                    'anomaly_score': output_aligned.tolist(),
                    'logits': logits_aligned.tolist() if logits is not None else None
                }).to_pickle(output_path)
                print(f'Results saved to {output_path}')
        else:
            print(f'At {args.filename}: '+output)
            # Save error information as well
            result_dict = {
                'filename': args.filename,
                'AD_Name': args.AD_Name,
                'sliding_window': None,
                'train_index': None,
                'data_shape': None,
                'error_message': output
            }
            all_results.append(result_dict)

    # Convert results to DataFrame and save to CSV
    if all_results:
        results_df = pd.DataFrame(all_results)
        # win_size =  str(Optimal_Det_HP['win_size']) if Optimal_Det_HP['win_size'] else ""
        output_filename = f'{"Multi" if Multi else "Uni"}_{args.AD_Name}.csv'
        results_df.to_csv(output_filename, index=False)
        print(f"\nAll results saved to {output_filename}")
        print(f"Total file processed: {len(all_results)}")
        print(f"Results shape: {results_df.shape}")
        if all_logits:
            logits_df = pd.DataFrame(all_logits)
            logits_output_filename = f'{"Multi" if Multi else "Uni"}_{args.AD_Name}.csv'
            logits_df.to_csv(logits_output_filename, index=False)
            print(f"Logits results saved to {logits_output_filename}")
    else:
        print("No results to save.")