GeoBot-Forecasting-Framework / geobot /timeseries /point_processes.py

Initial GeoBot Forecasting Framework commit

484e3bc 30 days ago

22.4 kB

	"""
	Hawkes Processes and Point Process Models for Conflict Modeling

	Hawkes processes are self-exciting point processes where past events increase
	the probability of future events. Critical for modeling:
	- Conflict contagion and escalation dynamics
	- Terrorist attack clustering
	- Diplomatic incident cascades
	- Arms race dynamics
	- Protest contagion

	Mathematical foundation:
	λ(t) = μ + ∫_{-∞}^t φ(t - s) dN(s)

	where:
	- λ(t): instantaneous event rate (intensity)
	- μ: baseline rate
	- φ(t): excitation kernel (how past events affect current rate)
	- N(s): counting process of past events

	Key concepts:
	- Branching ratio: Expected number of offspring events per parent
	- If branching ratio < 1: process is stable (subcritical)
	- If branching ratio ≥ 1: process is explosive (supercritical)
	"""

	import numpy as np
	from scipy import optimize, stats, integrate
	from typing import List, Tuple, Optional, Callable, Dict
	from dataclasses import dataclass
	import warnings


	@dataclass
	class HawkesParameters:
	"""Parameters for a Hawkes process."""
	mu: float # Baseline intensity
	alpha: float # Excitation amplitude
	beta: float # Decay rate

	@property
	def branching_ratio(self) -> float:
	"""Expected number of offspring per event."""
	return self.alpha / self.beta

	@property
	def is_stable(self) -> bool:
	"""Check if process is subcritical (stable)."""
	return self.branching_ratio < 1.0


	@dataclass
	class HawkesFitResult:
	"""Results from fitting a Hawkes process."""
	params: HawkesParameters
	log_likelihood: float
	aic: float
	bic: float
	n_events: int
	time_span: float
	intensity_trace: Optional[np.ndarray] = None
	times: Optional[np.ndarray] = None


	class UnivariateHawkesProcess:
	"""
	Univariate (1-dimensional) Hawkes Process.

	Intensity function:
	λ(t) = μ + α ∑_{t_i < t} exp(-β(t - t_i))

	This is a self-exciting process where each event increases future intensity.

	Example:
	>>> hawkes = UnivariateHawkesProcess()
	>>> events = hawkes.simulate(mu=0.5, alpha=0.8, beta=1.5, T=100.0)
	>>> result = hawkes.fit(events, T=100.0)
	>>> print(f"Branching ratio: {result.params.branching_ratio:.3f}")
	>>> prediction = hawkes.predict_intensity(events, result.params, t=105.0)
	"""

	def __init__(self, kernel: str = 'exponential'):
	"""
	Initialize Hawkes process.

	Args:
	kernel: Excitation kernel type ('exponential', 'power_law')
	"""
	self.kernel = kernel

	def simulate(self, mu: float, alpha: float, beta: float, T: float,
	max_events: int = 10000) -> np.ndarray:
	"""
	Simulate Hawkes process using Ogata's thinning algorithm.

	Args:
	mu: Baseline intensity
	alpha: Excitation amplitude
	beta: Decay rate
	T: Time horizon
	max_events: Maximum number of events to generate

	Returns:
	Array of event times
	"""
	events = []
	t = 0.0
	lambda_star = mu # Upper bound on intensity

	while t < T and len(events) < max_events:
	# Generate candidate event
	lambda_star = self._compute_intensity(t, events, mu, alpha, beta)

	# Add safety margin
	lambda_star = lambda_star * 1.1 + 0.01

	# Draw inter-event time from exponential
	u = np.random.uniform()
	if lambda_star <= 0:
	break
	t = t - np.log(u) / lambda_star

	if t > T:
	break

	# Acceptance-rejection
	lambda_t = self._compute_intensity(t, events, mu, alpha, beta)
	D = np.random.uniform()

	if D * lambda_star <= lambda_t:
	events.append(t)

	return np.array(events)

	def fit(self, events: np.ndarray, T: float,
	initial_guess: Optional[Tuple[float, float, float]] = None) -> HawkesFitResult:
	"""
	Fit Hawkes process parameters using maximum likelihood.

	Args:
	events: Array of event times
	T: Time horizon (observation period end)
	initial_guess: Initial parameter guess (mu, alpha, beta)

	Returns:
	HawkesFitResult with estimated parameters
	"""
	events = np.asarray(events)
	events = np.sort(events)
	n_events = len(events)

	if initial_guess is None:
	# Initialize with reasonable defaults
	mu_init = n_events / T # Average rate
	alpha_init = mu_init * 0.5 # Conservative excitation
	beta_init = 1.0
	initial_guess = (mu_init, alpha_init, beta_init)

	# Define negative log-likelihood
	def neg_log_likelihood(params):
	mu, alpha, beta = params

	# Constrain to positive values
	if mu <= 0 or alpha <= 0 or beta <= 0:
	return 1e10

	# Check stability
	if alpha / beta >= 1.0:
	return 1e10 # Penalize explosive processes

	return -self._log_likelihood(events, T, mu, alpha, beta)

	# Optimize
	bounds = [(1e-6, None), (1e-6, None), (1e-6, None)]
	result = optimize.minimize(
	neg_log_likelihood,
	x0=initial_guess,
	method='L-BFGS-B',
	bounds=bounds
	)

	if not result.success:
	warnings.warn(f"Optimization did not converge: {result.message}")

	mu_opt, alpha_opt, beta_opt = result.x
	log_likelihood = -result.fun

	# Compute information criteria
	n_params = 3
	aic = -2 * log_likelihood + 2 * n_params
	bic = -2 * log_likelihood + np.log(n_events) * n_params

	params = HawkesParameters(mu=mu_opt, alpha=alpha_opt, beta=beta_opt)

	return HawkesFitResult(
	params=params,
	log_likelihood=log_likelihood,
	aic=aic,
	bic=bic,
	n_events=n_events,
	time_span=T
	)

	def predict_intensity(self, events: np.ndarray, params: HawkesParameters,
	t: float) -> float:
	"""
	Predict intensity at time t given past events.

	Args:
	events: Past event times (must be < t)
	params: Hawkes parameters
	t: Time to predict intensity

	Returns:
	Intensity λ(t)
	"""
	return self._compute_intensity(t, events, params.mu, params.alpha, params.beta)

	def _compute_intensity(self, t: float, events: List[float],
	mu: float, alpha: float, beta: float) -> float:
	"""Compute intensity at time t."""
	if len(events) == 0:
	return mu

	events_array = np.asarray(events)
	past_events = events_array[events_array < t]

	if len(past_events) == 0:
	return mu

	# Exponential kernel
	excitation = alpha * np.sum(np.exp(-beta * (t - past_events)))

	return mu + excitation

	def _log_likelihood(self, events: np.ndarray, T: float,
	mu: float, alpha: float, beta: float) -> float:
	"""
	Compute log-likelihood for Hawkes process.

	LL = ∑_i log(λ(t_i)) - ∫_0^T λ(s) ds
	"""
	n_events = len(events)

	if n_events == 0:
	return -mu * T

	# First term: ∑ log(λ(t_i))
	log_sum = 0.0
	for i, t_i in enumerate(events):
	lambda_i = self._compute_intensity(t_i, events[:i], mu, alpha, beta)
	if lambda_i <= 0:
	return -np.inf
	log_sum += np.log(lambda_i)

	# Second term: ∫_0^T λ(s) ds
	# For exponential kernel, this has closed form:
	# ∫_0^T λ(s) ds = μT + α ∑_i (1 - exp(-β(T - t_i))) / β

	integral = mu * T
	integral += alpha * np.sum(1 - np.exp(-beta * (T - events))) / beta

	return log_sum - integral


	class MultivariateHawkesProcess:
	"""
	Multivariate Hawkes Process for multiple interacting event streams.

	For K event types, the intensity of type k is:
	λ_k(t) = μ_k + ∑_{j=1}^K α_{kj} ∑_{t_i^j < t} φ_{kj}(t - t_i^j)

	This captures cross-excitation between different event types.
	Example: Conflict in country A affects conflict probability in country B.

	Example:
	>>> # Model 3 countries with mutual excitation
	>>> hawkes = MultivariateHawkesProcess(n_dimensions=3)
	>>> events = hawkes.simulate(
	... mu=np.array([0.5, 0.3, 0.4]),
	... alpha=np.array([[0.2, 0.1, 0.05],
	... [0.15, 0.3, 0.1],
	... [0.1, 0.1, 0.25]]),
	... beta=np.ones((3, 3)),
	... T=100.0
	... )
	>>> result = hawkes.fit(events, T=100.0)
	"""

	def __init__(self, n_dimensions: int, kernel: str = 'exponential'):
	"""
	Initialize multivariate Hawkes process.

	Args:
	n_dimensions: Number of event types (dimensions)
	kernel: Excitation kernel type
	"""
	self.n_dimensions = n_dimensions
	self.kernel = kernel

	def simulate(self, mu: np.ndarray, alpha: np.ndarray, beta: np.ndarray,
	T: float, max_events: int = 10000) -> List[List[float]]:
	"""
	Simulate multivariate Hawkes process.

	Args:
	mu: Baseline intensities, shape (K,)
	alpha: Excitation matrix, shape (K, K)
	alpha[i,j] = effect of event type j on type i
	beta: Decay rates, shape (K, K)
	T: Time horizon
	max_events: Maximum total events

	Returns:
	List of event lists, one per dimension
	"""
	K = self.n_dimensions
	events = [[] for _ in range(K)]
	total_events = 0

	t = 0.0
	lambda_star = np.sum(mu) * 2 # Initial upper bound

	while t < T and total_events < max_events:
	# Compute current intensities
	intensities = self._compute_intensities(t, events, mu, alpha, beta)
	lambda_star = max(np.sum(intensities) * 1.5, 0.01)

	# Generate candidate event
	u = np.random.uniform()
	t = t - np.log(u) / lambda_star

	if t > T:
	break

	# Which process?
	intensities_t = self._compute_intensities(t, events, mu, alpha, beta)
	total_intensity = np.sum(intensities_t)

	D = np.random.uniform()
	if D * lambda_star <= total_intensity:
	# Accept event, determine which dimension
	probs = intensities_t / total_intensity
	dimension = np.random.choice(K, p=probs)
	events[dimension].append(t)
	total_events += 1

	return events

	def fit(self, events: List[List[float]], T: float) -> Dict:
	"""
	Fit multivariate Hawkes process.

	Args:
	events: List of event lists, one per dimension
	T: Time horizon

	Returns:
	Dictionary with estimated parameters
	"""
	K = self.n_dimensions

	# Convert to arrays
	events_arrays = [np.asarray(e) for e in events]

	# Initialize parameters
	n_events = [len(e) for e in events_arrays]
	mu_init = np.array([n / T for n in n_events])

	# Simple initialization: assume weak cross-excitation
	alpha_init = np.zeros((K, K))
	for i in range(K):
	alpha_init[i, i] = mu_init[i] * 0.3 # Self-excitation
	for j in range(K):
	if i != j:
	alpha_init[i, j] = mu_init[i] * 0.1 # Cross-excitation

	beta_init = np.ones((K, K))

	# Flatten parameters for optimization
	def pack_params(mu, alpha, beta):
	return np.concatenate([mu.flatten(), alpha.flatten(), beta.flatten()])

	def unpack_params(x):
	mu = x[:K]
	alpha = x[K:K + K*K].reshape(K, K)
	beta = x[K + K*K:].reshape(K, K)
	return mu, alpha, beta

	# Negative log-likelihood
	def neg_log_likelihood(x):
	mu, alpha, beta = unpack_params(x)

	# Constraints
	if np.any(mu <= 0) or np.any(alpha < 0) or np.any(beta <= 0):
	return 1e10

	# Stability check (approximate)
	branching_ratios = alpha / beta
	if np.max(np.linalg.eigvals(branching_ratios).real) >= 0.99:
	return 1e10

	return -self._log_likelihood(events_arrays, T, mu, alpha, beta)

	# Optimize
	x0 = pack_params(mu_init, alpha_init, beta_init)
	bounds = [(1e-6, None)] * len(x0) # All positive

	result = optimize.minimize(
	neg_log_likelihood,
	x0=x0,
	method='L-BFGS-B',
	bounds=bounds
	)

	mu_opt, alpha_opt, beta_opt = unpack_params(result.x)
	log_likelihood = -result.fun

	# Information criteria
	n_params = len(x0)
	total_events = sum(n_events)
	aic = -2 * log_likelihood + 2 * n_params
	bic = -2 * log_likelihood + np.log(total_events) * n_params

	return {
	'mu': mu_opt,
	'alpha': alpha_opt,
	'beta': beta_opt,
	'branching_matrix': alpha_opt / beta_opt,
	'spectral_radius': np.max(np.abs(np.linalg.eigvals(alpha_opt / beta_opt))),
	'log_likelihood': log_likelihood,
	'aic': aic,
	'bic': bic,
	'n_events': n_events,
	'converged': result.success
	}

	def predict_intensities(self, events: List[List[float]],
	mu: np.ndarray, alpha: np.ndarray, beta: np.ndarray,
	t: float) -> np.ndarray:
	"""
	Predict intensities for all dimensions at time t.

	Args:
	events: Past events
	mu, alpha, beta: Parameters
	t: Time to predict

	Returns:
	Intensity vector, shape (K,)
	"""
	return self._compute_intensities(t, events, mu, alpha, beta)

	def _compute_intensities(self, t: float, events: List[List[float]],
	mu: np.ndarray, alpha: np.ndarray,
	beta: np.ndarray) -> np.ndarray:
	"""Compute intensity vector at time t."""
	K = self.n_dimensions
	intensities = mu.copy()

	for k in range(K):
	for j in range(K):
	if len(events[j]) > 0:
	events_j = np.asarray(events[j])
	past_events = events_j[events_j < t]
	if len(past_events) > 0:
	excitation = alpha[k, j] * np.sum(
	np.exp(-beta[k, j] * (t - past_events))
	)
	intensities[k] += excitation

	return intensities

	def _log_likelihood(self, events: List[np.ndarray], T: float,
	mu: np.ndarray, alpha: np.ndarray,
	beta: np.ndarray) -> float:
	"""Compute log-likelihood for multivariate process."""
	K = self.n_dimensions
	log_sum = 0.0

	# First term: ∑_k ∑_i log(λ_k(t_i^k))
	for k in range(K):
	if len(events[k]) == 0:
	continue

	for i, t_i in enumerate(events[k]):
	# Need to compute λ_k(t_i) considering all past events
	events_up_to_i = [[] for _ in range(K)]
	for j in range(K):
	events_up_to_i[j] = events[j][events[j] < t_i].tolist()

	intensities = self._compute_intensities(t_i, events_up_to_i, mu, alpha, beta)
	lambda_k = intensities[k]

	if lambda_k <= 0:
	return -np.inf
	log_sum += np.log(lambda_k)

	# Second term: -∫_0^T ∑_k λ_k(s) ds
	integral = np.sum(mu) * T

	for k in range(K):
	for j in range(K):
	if len(events[j]) > 0:
	integral += alpha[k, j] * np.sum(
	(1 - np.exp(-beta[k, j] * (T - events[j]))) / beta[k, j]
	)

	return log_sum - integral


	class ConflictContagionModel:
	"""
	Specialized Hawkes model for geopolitical conflict contagion.

	Features:
	- Models both self-excitation (conflict escalation within a country)
	- Models cross-excitation (conflict spreading between countries)
	- Incorporates spatial/network structure
	- Estimates contagion risk and early warning indicators

	Example:
	>>> countries = ['Syria', 'Iraq', 'Turkey']
	>>> model = ConflictContagionModel(countries=countries)
	>>>
	>>> # Fit to historical conflict events
	>>> events = {
	... 'Syria': [1.2, 5.3, 10.1, ...],
	... 'Iraq': [3.4, 8.9, ...],
	... 'Turkey': [12.3, ...]
	... }
	>>> result = model.fit(events, T=365.0) # 1 year
	>>>
	>>> # Predict contagion risk
	>>> risk = model.contagion_risk(events, result, t=370.0)
	>>> print(f"Syria conflict risk in next 5 days: {risk['Syria']:.2%}")
	"""

	def __init__(self, countries: List[str]):
	"""
	Initialize conflict contagion model.

	Args:
	countries: List of country names
	"""
	self.countries = countries
	self.n_countries = len(countries)
	self.hawkes = MultivariateHawkesProcess(n_dimensions=self.n_countries)

	def fit(self, events: Dict[str, List[float]], T: float) -> Dict:
	"""
	Fit contagion model to conflict events.

	Args:
	events: Dictionary mapping country name to list of event times
	T: Observation period

	Returns:
	Fitted parameters with interpretation
	"""
	# Convert to list format
	events_list = [events[country] for country in self.countries]

	# Fit multivariate Hawkes
	result = self.hawkes.fit(events_list, T)

	# Add interpretations
	result['countries'] = self.countries
	result['self_excitation'] = np.diag(result['alpha'])
	result['cross_excitation_mean'] = np.mean(
	result['alpha'][~np.eye(self.n_countries, dtype=bool)]
	)

	# Identify most contagious countries
	outgoing_contagion = np.sum(result['alpha'], axis=0) - np.diag(result['alpha'])
	incoming_contagion = np.sum(result['alpha'], axis=1) - np.diag(result['alpha'])

	result['most_contagious_source'] = self.countries[np.argmax(outgoing_contagion)]
	result['most_vulnerable_target'] = self.countries[np.argmax(incoming_contagion)]

	return result

	def contagion_risk(self, events: Dict[str, List[float]],
	params: Dict, t: float, horizon: float = 5.0) -> Dict[str, float]:
	"""
	Estimate contagion risk over next time period.

	Args:
	events: Historical events
	params: Fitted parameters
	t: Current time
	horizon: Risk horizon (time units)

	Returns:
	Dictionary mapping country to probability of conflict
	"""
	events_list = [events[country] for country in self.countries]

	# Compute current intensities
	intensities = self.hawkes.predict_intensities(
	events_list, params['mu'], params['alpha'], params['beta'], t
	)

	# Probability of at least one event in [t, t+horizon]
	# P(N(t+h) - N(t) ≥ 1) = 1 - P(N(t+h) - N(t) = 0)
	# Approximate with constant intensity
	risks = {}
	for i, country in enumerate(self.countries):
	# Poisson approximation
	expected_events = intensities[i] * horizon
	prob_no_event = np.exp(-expected_events)
	prob_at_least_one = 1 - prob_no_event
	risks[country] = prob_at_least_one

	return risks

	def identify_contagion_pathways(self, params: Dict, threshold: float = 0.1) -> List[Tuple[str, str, float]]:
	"""
	Identify significant contagion pathways between countries.

	Args:
	params: Fitted parameters
	threshold: Minimum branching ratio to report

	Returns:
	List of (source, target, branching_ratio) tuples
	"""
	alpha = params['alpha']
	beta = params['beta']
	branching = alpha / beta

	pathways = []
	for i in range(self.n_countries):
	for j in range(self.n_countries):
	if i != j and branching[i, j] > threshold:
	pathways.append((
	self.countries[j], # Source
	self.countries[i], # Target
	branching[i, j]
	))

	# Sort by strength
	pathways.sort(key=lambda x: x[2], reverse=True)

	return pathways


	def estimate_branching_ratio(events: np.ndarray, T: float) -> float:
	"""
	Quick estimate of branching ratio for stability assessment.

	Args:
	events: Event times
	T: Time horizon

	Returns:
	Estimated branching ratio
	"""
	hawkes = UnivariateHawkesProcess()
	result = hawkes.fit(events, T)
	return result.params.branching_ratio


	def detect_explosive_regime(events: np.ndarray, T: float, window: float = 10.0) -> List[Tuple[float, float]]:
	"""
	Detect time periods where process became explosive (supercritical).

	Args:
	events: Event times
	T: Total time horizon
	window: Rolling window size

	Returns:
	List of (start_time, branching_ratio) for explosive periods
	"""
	events = np.sort(events)
	explosive_periods = []

	t = window
	while t <= T:
	# Events in window [t-window, t]
	window_events = events[(events >= t - window) & (events <= t)]

	if len(window_events) > 5: # Need minimum events
	br = estimate_branching_ratio(window_events - (t - window), window)

	if br >= 0.9: # Near or above critical
	explosive_periods.append((t, br))

	t += window / 2 # Overlapping windows

	return explosive_periods