Spaces:
Running
Running
File size: 21,482 Bytes
d03866e b5b9227 d03866e b5b9227 d03866e b5b9227 d03866e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 |
import numpy as np
import math
from utils.slidingWindows import find_length_rank
Unsupervise_AD_Pool = ['FFT', 'SR', 'NORMA', 'Series2Graph', 'Sub_IForest', 'IForest', 'LOF', 'Sub_LOF', 'POLY', 'MatrixProfile', 'Sub_PCA', 'PCA', 'HBOS',
'Sub_HBOS', 'KNN', 'Sub_KNN','KMeansAD', 'KMeansAD_U', 'KShapeAD', 'COPOD', 'CBLOF', 'COF', 'EIF', 'RobustPCA', 'Lag_Llama',
'TimesFM', 'Chronos', 'MOMENT_ZS', 'DADA', 'Time_MOE', 'Time_RCD', 'TSPulse']
Semisupervise_AD_Pool = ['Left_STAMPi', 'SAND', 'MCD', 'Sub_MCD', 'OCSVM', 'Sub_OCSVM', 'AutoEncoder', 'CNN', 'LSTMAD', 'TranAD', 'USAD', 'OmniAnomaly',
'AnomalyTransformer', 'TimesNet', 'FITS', 'Donut', 'OFA', 'MOMENT_FT', 'M2N2', ]
def run_Unsupervise_AD(model_name, training_data, testing_data, **kwargs):
# Extract data_index if present, but don't pass it to all functions
data_index = kwargs.pop('data_index', None)
function_name = f'run_{model_name}'
function_to_call = globals()[function_name]
# Only pass data_index to functions that need it
if 'Reconstruction' in model_name:
results = function_to_call(data, data_index, **kwargs)
else:
results = function_to_call(testing_data, **kwargs)
return results
def run_Semisupervise_AD(model_name, data_train, data_test, **kwargs):
try:
function_name = f'run_{model_name}'
function_to_call = globals()[function_name]
results = function_to_call(data_train, data_test, **kwargs)
return results
except KeyError:
error_message = f"Model function '{function_name}' is not defined."
print(error_message)
return error_message
except Exception as e:
error_message = f"An error occurred while running the model '{function_name}': {str(e)}"
print(error_message)
return error_message
def run_FFT(data, ifft_parameters=5, local_neighbor_window=21, local_outlier_threshold=0.6, max_region_size=50, max_sign_change_distance=10):
from models.FFT import FFT
clf = FFT(ifft_parameters=ifft_parameters, local_neighbor_window=local_neighbor_window, local_outlier_threshold=local_outlier_threshold, max_region_size=max_region_size, max_sign_change_distance=max_sign_change_distance)
clf.fit(data)
score = clf.decision_scores_
return score.ravel()
def run_Sub_IForest(data, periodicity=1, n_estimators=100, max_features=1, n_jobs=1):
from models.IForest import IForest
slidingWindow = find_length_rank(data, rank=periodicity)
clf = IForest(slidingWindow=slidingWindow, n_estimators=n_estimators, max_features=max_features, n_jobs=n_jobs)
clf.fit(data)
score = clf.decision_scores_
return score.ravel()
def run_IForest(train_data, test_data, slidingWindow=100, n_estimators=100, max_features=1, n_jobs=1):
from models.IForest import IForest
clf = IForest(slidingWindow=slidingWindow, n_estimators=n_estimators, max_features=max_features, n_jobs=n_jobs)
clf.fit(train_data)
score = clf.decision_function(test_data)
# score = clf.decision_scores_
return score.ravel()
def run_Sub_LOF(data, periodicity=1, n_neighbors=30, metric='minkowski', n_jobs=1):
from models.LOF import LOF
slidingWindow = find_length_rank(data, rank=periodicity)
clf = LOF(slidingWindow=slidingWindow, n_neighbors=n_neighbors, metric=metric, n_jobs=n_jobs)
clf.fit(data)
score = clf.decision_scores_
return score.ravel()
def run_LOF(train_data, test_data, slidingWindow=1, n_neighbors=30, metric='minkowski', n_jobs=1):
from models.LOF import LOF
clf = LOF(slidingWindow=slidingWindow, n_neighbors=n_neighbors, metric=metric, n_jobs=n_jobs)
clf.fit(train_data)
score = clf.decision_function(test_data)
return score.ravel()
def run_POLY(data, periodicity=1, power=3, n_jobs=1):
from models.POLY import POLY
slidingWindow = find_length_rank(data, rank=periodicity)
clf = POLY(power=power, window = slidingWindow)
clf.fit(data)
score = clf.decision_scores_
return score.ravel()
def run_MatrixProfile(data, periodicity=1, n_jobs=1):
from models.MatrixProfile import MatrixProfile
slidingWindow = find_length_rank(data, rank=periodicity)
clf = MatrixProfile(window=slidingWindow)
clf.fit(data)
score = clf.decision_scores_
return score.ravel()
def run_Left_STAMPi(data_train, data):
from models.Left_STAMPi import Left_STAMPi
clf = Left_STAMPi(n_init_train=len(data_train), window_size=100)
clf.fit(data)
score = clf.decision_function(data)
return score.ravel()
def run_SAND(data_train, data_test, periodicity=1):
from models.SAND import SAND
slidingWindow = find_length_rank(data_test, rank=periodicity)
clf = SAND(pattern_length=slidingWindow, subsequence_length=4*(slidingWindow))
clf.fit(data_test.squeeze(), online=True, overlaping_rate=int(1.5*slidingWindow), init_length=len(data_train), alpha=0.5, batch_size=max(5*(slidingWindow), int(0.1*len(data_test))))
score = clf.decision_scores_
return score.ravel()
def run_KShapeAD(data, periodicity=1):
from models.SAND import SAND
slidingWindow = find_length_rank(data, rank=periodicity)
clf = SAND(pattern_length=slidingWindow, subsequence_length=4*(slidingWindow))
clf.fit(data.squeeze(), overlaping_rate=int(1.5*slidingWindow))
score = clf.decision_scores_
return score.ravel()
def run_Series2Graph(data, periodicity=1):
from models.Series2Graph import Series2Graph
slidingWindow = find_length_rank(data, rank=periodicity)
data = data.squeeze()
s2g = Series2Graph(pattern_length=slidingWindow)
s2g.fit(data)
query_length = 2*slidingWindow
s2g.score(query_length=query_length,dataset=data)
score = s2g.decision_scores_
score = np.array([score[0]]*math.ceil(query_length//2) + list(score) + [score[-1]]*(query_length//2))
return score.ravel()
def run_Sub_PCA(train_data, test_data, periodicity=1, n_components=None, n_jobs=1):
from models.PCA import PCA
slidingWindow = find_length_rank(train_data, rank=periodicity)
clf = PCA(slidingWindow = slidingWindow, n_components=n_components)
clf.fit(train_data)
score = clf.decision_function(test_data)
return score.ravel()
def run_PCA(train_data, test_data, slidingWindow=100, n_components=None, n_jobs=1):
from models.PCA import PCA
clf = PCA(slidingWindow = slidingWindow, n_components=n_components)
clf.fit(train_data)
score = clf.decision_function(test_data)
return score.ravel()
def run_NORMA(data, periodicity=1, clustering='hierarchical', n_jobs=1):
from models.NormA import NORMA
slidingWindow = find_length_rank(data, rank=periodicity)
clf = NORMA(pattern_length=slidingWindow, nm_size=3*slidingWindow, clustering=clustering)
clf.fit(data)
score = clf.decision_scores_
score = np.array([score[0]]*math.ceil((slidingWindow-1)/2) + list(score) + [score[-1]]*((slidingWindow-1)//2))
if len(score) > len(data):
start = len(score) - len(data)
score = score[start:]
return score.ravel()
def run_Sub_HBOS(data, periodicity=1, n_bins=10, tol=0.5, n_jobs=1):
from models.HBOS import HBOS
slidingWindow = find_length_rank(data, rank=periodicity)
clf = HBOS(slidingWindow=slidingWindow, n_bins=n_bins, tol=tol)
clf.fit(data)
score = clf.decision_scores_
return score.ravel()
def run_HBOS(data, slidingWindow=1, n_bins=10, tol=0.5, n_jobs=1):
from models.HBOS import HBOS
clf = HBOS(slidingWindow=slidingWindow, n_bins=n_bins, tol=tol)
clf.fit(data)
score = clf.decision_scores_
return score.ravel()
def run_Sub_OCSVM(data_train, data_test, kernel='rbf', nu=0.5, periodicity=1, n_jobs=1):
from models.OCSVM import OCSVM
slidingWindow = find_length_rank(data_test, rank=periodicity)
clf = OCSVM(slidingWindow=slidingWindow, kernel=kernel, nu=nu)
clf.fit(data_train)
score = clf.decision_function(data_test)
return score.ravel()
def run_OCSVM(data_train, data_test, kernel='rbf', nu=0.5, slidingWindow=1, n_jobs=1):
from models.OCSVM import OCSVM
clf = OCSVM(slidingWindow=slidingWindow, kernel=kernel, nu=nu)
clf.fit(data_train)
score = clf.decision_function(data_test)
return score.ravel()
def run_Sub_MCD(data_train, data_test, support_fraction=None, periodicity=1, n_jobs=1):
from models.MCD import MCD
slidingWindow = find_length_rank(data_test, rank=periodicity)
clf = MCD(slidingWindow=slidingWindow, support_fraction=support_fraction)
clf.fit(data_train)
score = clf.decision_function(data_test)
return score.ravel()
def run_MCD(data_train, data_test, support_fraction=None, slidingWindow=1, n_jobs=1):
from models.MCD import MCD
clf = MCD(slidingWindow=slidingWindow, support_fraction=support_fraction)
clf.fit(data_train)
score = clf.decision_function(data_test)
return score.ravel()
def run_Sub_KNN(data, n_neighbors=10, method='largest', periodicity=1, n_jobs=1):
from models.KNN import KNN
slidingWindow = find_length_rank(data, rank=periodicity)
clf = KNN(slidingWindow=slidingWindow, n_neighbors=n_neighbors,method=method, n_jobs=n_jobs)
clf.fit(data)
score = clf.decision_scores_
return score.ravel()
def run_KNN(data, slidingWindow=1, n_neighbors=10, method='largest', n_jobs=1):
from models.KNN import KNN
clf = KNN(slidingWindow=slidingWindow, n_neighbors=n_neighbors, method=method, n_jobs=n_jobs)
clf.fit(data)
score = clf.decision_scores_
return score.ravel()
def run_KMeansAD(data, n_clusters=20, window_size=20, n_jobs=1):
from models.KMeansAD import KMeansAD
clf = KMeansAD(k=n_clusters, window_size=window_size, stride=1, n_jobs=n_jobs)
score = clf.fit_predict(data)
return score.ravel()
def run_KMeansAD_U(data, n_clusters=20, periodicity=1,n_jobs=1):
from models.KMeansAD import KMeansAD
slidingWindow = find_length_rank(data, rank=periodicity)
clf = KMeansAD(k=n_clusters, window_size=slidingWindow, stride=1, n_jobs=n_jobs)
score = clf.fit_predict(data)
return score.ravel()
def run_COPOD(data, n_jobs=1):
from models.COPOD import COPOD
clf = COPOD(n_jobs=n_jobs)
clf.fit(data)
score = clf.decision_scores_
return score.ravel()
def run_CBLOF(data, n_clusters=8, alpha=0.9, n_jobs=1):
from models.CBLOF import CBLOF
clf = CBLOF(n_clusters=n_clusters, alpha=alpha, n_jobs=n_jobs)
clf.fit(data)
score = clf.decision_scores_
return score.ravel()
def run_COF(data, n_neighbors=30):
from models.COF import COF
clf = COF(n_neighbors=n_neighbors)
clf.fit(data)
score = clf.decision_scores_
return score.ravel()
def run_EIF(data, n_trees=100):
from models.EIF import EIF
clf = EIF(n_trees=n_trees)
clf.fit(data)
score = clf.decision_scores_
return score.ravel()
def run_RobustPCA(data, max_iter=1000):
from models.RobustPCA import RobustPCA
clf = RobustPCA(max_iter=max_iter)
clf.fit(data)
score = clf.decision_scores_
return score.ravel()
def run_SR(data, periodicity=1):
from models.SR import SR
slidingWindow = find_length_rank(data, rank=periodicity)
return SR(data, window_size=slidingWindow)
def run_AutoEncoder(data_train, data_test, window_size=100, hidden_neurons=[64, 32], n_jobs=1):
from models.AE import AutoEncoder
clf = AutoEncoder(slidingWindow=window_size, hidden_neurons=hidden_neurons, batch_size=128, epochs=50)
clf.fit(data_train)
score = clf.decision_function(data_test)
return score.ravel()
def run_CNN(data_train, data_test, window_size=100, num_channel=[32, 32, 40], lr=0.0008, n_jobs=1):
from models.CNN import CNN
clf = CNN(window_size=window_size, num_channel=num_channel, feats=data_test.shape[1], lr=lr, batch_size=128)
clf.fit(data_train)
score = clf.decision_function(data_test)
return score.ravel()
def run_LSTMAD(data_train, data_test, window_size=100, lr=0.0008):
from models.LSTMAD import LSTMAD
clf = LSTMAD(window_size=window_size, pred_len=1, lr=lr, feats=data_test.shape[1], batch_size=128)
clf.fit(data_train)
score = clf.decision_function(data_test)
return score.ravel()
def run_TranAD(data_train, data_test, win_size=10, lr=1e-3):
from models.TranAD import TranAD
clf = TranAD(win_size=win_size, feats=data_test.shape[1], lr=lr)
clf.fit(data_train)
score = clf.decision_function(data_test)
return score.ravel()
def run_AnomalyTransformer(data_train, data_test, win_size=100, lr=1e-4, batch_size=128):
from models.AnomalyTransformer import AnomalyTransformer
clf = AnomalyTransformer(win_size=win_size, input_c=data_test.shape[1], lr=lr, batch_size=batch_size)
clf.fit(data_train)
score = clf.decision_function(data_test)
return score.ravel()
def run_OmniAnomaly(data_train, data_test, win_size=100, lr=0.002):
from models.OmniAnomaly import OmniAnomaly
clf = OmniAnomaly(win_size=win_size, feats=data_test.shape[1], lr=lr)
clf.fit(data_train)
score = clf.decision_function(data_test)
return score.ravel()
def run_USAD(data_train, data_test, win_size=5, lr=1e-4):
from models.USAD import USAD
clf = USAD(win_size=win_size, feats=data_test.shape[1], lr=lr)
clf.fit(data_train)
score = clf.decision_function(data_test)
return score.ravel()
def run_Donut(data_train, data_test, win_size=120, lr=1e-4, batch_size=128):
from models.Donut import Donut
clf = Donut(win_size=win_size, input_c=data_test.shape[1], lr=lr, batch_size=batch_size)
clf.fit(data_train)
score = clf.decision_function(data_test)
return score.ravel()
def run_TimesNet(data_train, data_test, win_size=96, lr=1e-4):
from models.TimesNet import TimesNet
clf = TimesNet(win_size=win_size, enc_in=data_test.shape[1], lr=lr, epochs=50)
clf.fit(data_train)
score = clf.decision_function(data_test)
return score.ravel()
def run_FITS(data_train, data_test, win_size=100, lr=1e-3):
from models.FITS import FITS
clf = FITS(win_size=win_size, input_c=data_test.shape[1], lr=lr, batch_size=128)
clf.fit(data_train)
score = clf.decision_function(data_test)
return score.ravel()
def run_OFA(data_train, data_test, win_size=100, batch_size = 64):
from models.OFA import OFA
clf = OFA(win_size=win_size, enc_in=data_test.shape[1], epochs=10, batch_size=batch_size)
clf.fit(data_train)
score = clf.decision_function(data_test)
return score.ravel()
def run_Lag_Llama(data, win_size=96, batch_size=64):
from models.Lag_Llama import Lag_Llama
clf = Lag_Llama(win_size=win_size, input_c=data.shape[1], batch_size=batch_size)
clf.fit(data)
score = clf.decision_scores_
return score.ravel()
def run_Chronos(data, win_size=50, batch_size=64):
from models.Chronos import Chronos
clf = Chronos(win_size=win_size, prediction_length=1, input_c=1, model_size='base', batch_size=batch_size)
data =data.reshape(-1,1)
clf.fit(data)
score = clf.decision_scores_
return score.ravel()
def run_TimesFM(data, win_size=96):
from models.TimesFM import TimesFM
clf = TimesFM(win_size=win_size)
data_normalized = (data - np.mean(data, axis=0)) / np.std(data, axis=0)
data_normalized = data_normalized.reshape(-1,1)
clf.fit(data_normalized)
#normalizd data:
score = clf.decision_scores_
return score.ravel()
def run_MOMENT_ZS(data, win_size=256):
from models.MOMENT import MOMENT
clf = MOMENT(win_size=win_size, input_c=1)
data = data.reshape(-1,1)
# Zero shot
clf.zero_shot(data)
score = clf.decision_scores_
return score.ravel()
def run_MOMENT_FT(data_train, data_test, win_size=256):
from models.MOMENT import MOMENT
clf = MOMENT(win_size=win_size, input_c=data_test.shape[1])
# Finetune
clf.fit(data_train)
score = clf.decision_function(data_test)
return score.ravel()
def run_M2N2(
data_train, data_test, win_size=12, stride=12,
batch_size=64, epochs=100, latent_dim=16,
lr=1e-3, ttlr=1e-3, normalization='Detrend',
gamma=0.99, th=0.9, valid_size=0.2, infer_mode='online'
):
from models.M2N2 import M2N2
clf = M2N2(
win_size=win_size, stride=stride,
num_channels=data_test.shape[1],
batch_size=batch_size, epochs=epochs,
latent_dim=latent_dim,
lr=lr, ttlr=ttlr,
normalization=normalization,
gamma=gamma, th=th, valid_size=valid_size,
infer_mode=infer_mode
)
clf.fit(data_train)
score = clf.decision_function(data_test)
return score.ravel()
def run_DADA(data_test, device=0, win_size=100, batch_size=32):
from models.DADA import DADA
clf = DADA(device=device, win_size=win_size, batch_size=batch_size)
score = clf.zero_shot(data_test)
return score.ravel()
def run_Time_MOE(data, device=0, win_size=64, batch_size=32):
from models.time_moe import Time_MOE
clf = Time_MOE(device=device, win_size=win_size, batch_size=batch_size)
score = clf.zero_shot(data)
return score.ravel()
def run_Time_RCD(data, **kwargs):
Multi = kwargs.get('Multi', False)
win_size = kwargs.get('win_size', 5000)
batch_size = kwargs.get('batch_size', 64)
random_mask = kwargs.get('random_mask', 'random_mask')
size = kwargs.get('size', 'full')
device = kwargs.get('device', '2') # Extract device parameter
"""
Wrapper function for Time_RCD model
"""
from models.TimeRCD import TimeRCDPretrainTester
from models.time_rcd.time_rcd_config import TimeRCDConfig, default_config
config = default_config
if Multi:
if size == 'small':
if random_mask == 'random_mask':
checkpoint_path = 'checkpoints/dataset_10_20.pth'
else:
checkpoint_path = 'checkpoints/full_mask_10_20.pth'
config.ts_config.patch_size = 16
else:
if random_mask == 'random_mask':
checkpoint_path = 'checkpoints/dataset_15_56.pth'
else:
checkpoint_path = 'checkpoints/full_mask_15_56.pth'
config.ts_config.patch_size = 32
config.ts_config.num_features = data.shape[1]
else:
checkpoint_path = 'checkpoints/full_mask_anomaly_head_pretrain_checkpoint_best.pth'
config.ts_config.patch_size = 16
config.ts_config.num_features = 1
config.cuda_devices = device # Use the device parameter properly
print("Using CUDA device:", config.cuda_devices)
config.win_size = win_size
config.batch_size = batch_size
# config.ts_config.num_features = data.shape[1]
print("Here is the data shape: ", data.shape)
print(f"Checkpoint path: {checkpoint_path}")
cls = TimeRCDPretrainTester(checkpoint_path, config)
score_list, logit_list = cls.zero_shot(data)
# Concatenate across batches robustly to avoid inhomogeneous shape errors
score = np.concatenate([np.asarray(s).reshape(-1) for s in score_list], axis=0)
logit = np.concatenate([np.asarray(l).reshape(-1) for l in logit_list], axis=0)
return score, logit
def run_TSPulse(data, win_size=256, batch_size=64, prediction_mode=None, aggregation_length=64,
aggr_function="max", smoothing_length=8, least_significant_scale=0.01,
least_significant_score=0.1, device=None):
"""
Wrapper function for TSPulse anomaly detection model
Parameters
----------
data : numpy.ndarray
Time series data of shape (n_samples, n_features)
win_size : int, default=256
Window size (for compatibility, not directly used by TSPulse)
batch_size : int, default=64
Batch size for processing
prediction_mode : list, optional
List of prediction modes. If None, uses default time and frequency reconstruction
aggregation_length : int, default=64
Length for aggregation of scores
aggr_function : str, default="max"
Aggregation function ("max", "mean", "median")
smoothing_length : int, default=8
Length for smoothing the anomaly scores
least_significant_scale : float, default=0.01
Minimum scale for significance
least_significant_score : float, default=0.1
Minimum score for significance
device : str, optional
Device to use ("cuda" or "cpu"). Auto-detected if None.
Returns
-------
numpy.ndarray
Anomaly scores of shape (n_samples,)
"""
from models.TSPulse import run_TSPulse as tspulse_runner
# Prepare kwargs for TSPulse
kwargs = {
'batch_size': batch_size,
'aggregation_length': aggregation_length,
'aggr_function': aggr_function,
'smoothing_length': smoothing_length,
'least_significant_scale': least_significant_scale,
'least_significant_score': least_significant_score,
}
if prediction_mode is not None:
kwargs['prediction_mode'] = prediction_mode
if device is not None:
kwargs['device'] = device
try:
# Run TSPulse anomaly detection
score = tspulse_runner(data, **kwargs)
return score.ravel()
except Exception as e:
print(f"Warning: TSPulse failed with error: {str(e)}")
print("Falling back to random scores")
# Return random scores as fallback
return np.random.random(len(data)) * 0.1 |