File size: 4,735 Bytes
c4b87d2 0a58567 c4b87d2 0a58567 c4b87d2 0a58567 c4b87d2 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 |
import functools
import numpy as np
from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.gaussian_process.kernels import (
RBF,
ConstantKernel,
DotProduct,
ExpSineSquared,
Kernel,
RationalQuadratic,
WhiteKernel,
)
from src.synthetic_generation.abstract_classes import AbstractTimeSeriesGenerator
class KernelSynthGenerator(AbstractTimeSeriesGenerator):
"""
Generate independent synthetic univariate time series using kernel synthesis.
Each series is sampled from a Gaussian process prior with a random composite kernel.
"""
def __init__(
self,
length: int = 1024,
max_kernels: int = 5,
random_seed: int | None = None,
):
"""
Parameters
----------
length : int, optional
Number of time steps per series (default: 1024).
max_kernels : int, optional
Maximum number of base kernels to combine (default: 5).
random_seed : int, optional
Seed for the random number generator.
"""
self.length = length
self.max_kernels = max_kernels
self.rng = np.random.default_rng(random_seed)
self.kernel_bank = [
ExpSineSquared(periodicity=24 / length), # H
ExpSineSquared(periodicity=48 / length), # 0.5H
ExpSineSquared(periodicity=96 / length), # 0.25H
ExpSineSquared(periodicity=24 * 7 / length), # H-week
ExpSineSquared(periodicity=48 * 7 / length), # 0.5H-week
ExpSineSquared(periodicity=96 * 7 / length), # 0.25H-week
ExpSineSquared(periodicity=7 / length), # day
ExpSineSquared(periodicity=14 / length), # 0.5-day
ExpSineSquared(periodicity=30 / length), # day
ExpSineSquared(periodicity=60 / length), # 0.5-day
ExpSineSquared(periodicity=365 / length), # year
ExpSineSquared(periodicity=365 * 2 / length), # 0.5-year
ExpSineSquared(periodicity=4 / length), # week
ExpSineSquared(periodicity=26 / length), # week
ExpSineSquared(periodicity=52 / length), # week
ExpSineSquared(periodicity=4 / length), # month
ExpSineSquared(periodicity=6 / length), # month
ExpSineSquared(periodicity=12 / length), # month
ExpSineSquared(periodicity=4 / length), # quarter
ExpSineSquared(periodicity=4 * 10 / length), # quarter
ExpSineSquared(periodicity=10 / length), # year
DotProduct(sigma_0=0.0),
DotProduct(sigma_0=1.0),
DotProduct(sigma_0=10.0),
RBF(length_scale=0.1),
RBF(length_scale=1.0),
RBF(length_scale=10.0),
RationalQuadratic(alpha=0.1),
RationalQuadratic(alpha=1.0),
RationalQuadratic(alpha=10.0),
WhiteKernel(noise_level=0.1),
WhiteKernel(noise_level=1.0),
ConstantKernel(),
]
def _random_binary_map(self, a: Kernel, b: Kernel) -> Kernel:
"""
Randomly combine two kernels with + or *.
"""
ops = [lambda x, y: x + y, lambda x, y: x * y]
return self.rng.choice(ops)(a, b)
def _sample_from_gp_prior(
self,
kernel: Kernel,
X: np.ndarray,
random_seed: int | None = None,
) -> np.ndarray:
"""
Draw a sample from GP prior using GaussianProcessRegressor.
"""
if X.ndim == 1:
X = X[:, None]
gpr = GaussianProcessRegressor(kernel=kernel)
ts = gpr.sample_y(X, n_samples=1, random_state=random_seed)
return ts.squeeze()
def generate_time_series(self, random_seed: int | None = None) -> np.ndarray:
"""
Generate a single independent univariate time series.
Parameters
----------
random_seed : int, optional
Random seed for reproducible generation.
Returns
-------
np.ndarray
Shape: [seq_len]
"""
if random_seed is not None:
self.rng = np.random.default_rng(random_seed)
X = np.linspace(0, 1, self.length)
num_kernels = self.rng.integers(1, self.max_kernels + 1)
selected = self.rng.choice(self.kernel_bank, num_kernels, replace=True)
composite = functools.reduce(self._random_binary_map, selected)
try:
values = self._sample_from_gp_prior(composite, X, random_seed=random_seed)
except np.linalg.LinAlgError:
new_seed = (random_seed + 1) if random_seed is not None else None
return self.generate_time_series(new_seed)
return values
|