File size: 4,735 Bytes
c4b87d2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0a58567
c4b87d2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0a58567
c4b87d2
 
 
 
 
 
 
 
 
 
 
0a58567
c4b87d2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
import functools

import numpy as np
from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.gaussian_process.kernels import (
    RBF,
    ConstantKernel,
    DotProduct,
    ExpSineSquared,
    Kernel,
    RationalQuadratic,
    WhiteKernel,
)
from src.synthetic_generation.abstract_classes import AbstractTimeSeriesGenerator


class KernelSynthGenerator(AbstractTimeSeriesGenerator):
    """
    Generate independent synthetic univariate time series using kernel synthesis.

    Each series is sampled from a Gaussian process prior with a random composite kernel.
    """

    def __init__(
        self,
        length: int = 1024,
        max_kernels: int = 5,
        random_seed: int | None = None,
    ):
        """
        Parameters
        ----------
        length : int, optional
            Number of time steps per series (default: 1024).
        max_kernels : int, optional
            Maximum number of base kernels to combine (default: 5).
        random_seed : int, optional
            Seed for the random number generator.
        """
        self.length = length
        self.max_kernels = max_kernels
        self.rng = np.random.default_rng(random_seed)
        self.kernel_bank = [
            ExpSineSquared(periodicity=24 / length),  # H
            ExpSineSquared(periodicity=48 / length),  # 0.5H
            ExpSineSquared(periodicity=96 / length),  # 0.25H
            ExpSineSquared(periodicity=24 * 7 / length),  # H-week
            ExpSineSquared(periodicity=48 * 7 / length),  # 0.5H-week
            ExpSineSquared(periodicity=96 * 7 / length),  # 0.25H-week
            ExpSineSquared(periodicity=7 / length),  # day
            ExpSineSquared(periodicity=14 / length),  # 0.5-day
            ExpSineSquared(periodicity=30 / length),  # day
            ExpSineSquared(periodicity=60 / length),  # 0.5-day
            ExpSineSquared(periodicity=365 / length),  # year
            ExpSineSquared(periodicity=365 * 2 / length),  # 0.5-year
            ExpSineSquared(periodicity=4 / length),  # week
            ExpSineSquared(periodicity=26 / length),  # week
            ExpSineSquared(periodicity=52 / length),  # week
            ExpSineSquared(periodicity=4 / length),  # month
            ExpSineSquared(periodicity=6 / length),  # month
            ExpSineSquared(periodicity=12 / length),  # month
            ExpSineSquared(periodicity=4 / length),  # quarter
            ExpSineSquared(periodicity=4 * 10 / length),  # quarter
            ExpSineSquared(periodicity=10 / length),  # year
            DotProduct(sigma_0=0.0),
            DotProduct(sigma_0=1.0),
            DotProduct(sigma_0=10.0),
            RBF(length_scale=0.1),
            RBF(length_scale=1.0),
            RBF(length_scale=10.0),
            RationalQuadratic(alpha=0.1),
            RationalQuadratic(alpha=1.0),
            RationalQuadratic(alpha=10.0),
            WhiteKernel(noise_level=0.1),
            WhiteKernel(noise_level=1.0),
            ConstantKernel(),
        ]

    def _random_binary_map(self, a: Kernel, b: Kernel) -> Kernel:
        """
        Randomly combine two kernels with + or *.
        """
        ops = [lambda x, y: x + y, lambda x, y: x * y]
        return self.rng.choice(ops)(a, b)

    def _sample_from_gp_prior(
        self,
        kernel: Kernel,
        X: np.ndarray,
        random_seed: int | None = None,
    ) -> np.ndarray:
        """
        Draw a sample from GP prior using GaussianProcessRegressor.
        """
        if X.ndim == 1:
            X = X[:, None]
        gpr = GaussianProcessRegressor(kernel=kernel)
        ts = gpr.sample_y(X, n_samples=1, random_state=random_seed)

        return ts.squeeze()

    def generate_time_series(self, random_seed: int | None = None) -> np.ndarray:
        """
        Generate a single independent univariate time series.

        Parameters
        ----------
        random_seed : int, optional
            Random seed for reproducible generation.

        Returns
        -------
        np.ndarray
            Shape: [seq_len]
        """
        if random_seed is not None:
            self.rng = np.random.default_rng(random_seed)

        X = np.linspace(0, 1, self.length)
        num_kernels = self.rng.integers(1, self.max_kernels + 1)
        selected = self.rng.choice(self.kernel_bank, num_kernels, replace=True)
        composite = functools.reduce(self._random_binary_map, selected)
        try:
            values = self._sample_from_gp_prior(composite, X, random_seed=random_seed)
        except np.linalg.LinAlgError:
            new_seed = (random_seed + 1) if random_seed is not None else None
            return self.generate_time_series(new_seed)

        return values