File size: 20,304 Bytes
484e3bc |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 |
"""
Example 5: Complete GeoBotv1 Framework - Final Features
This example demonstrates the final critical components that complete GeoBotv1
to 100% research-grade capability:
1. Vector Autoregression (VAR/SVAR/DFM) - Econometric time-series analysis
2. Hawkes Processes - Conflict contagion and self-exciting dynamics
3. Quasi-Experimental Methods - Causal inference without randomization
- Synthetic Control Method (SCM)
- Difference-in-Differences (DiD)
- Regression Discontinuity Design (RDD)
- Instrumental Variables (IV)
These methods are essential for:
- Multi-country forecasting with spillovers (VAR)
- Modeling conflict escalation and contagion (Hawkes)
- Estimating policy effects and counterfactuals (quasi-experimental)
GeoBotv1 is now COMPLETE with all research-grade mathematical components!
"""
import numpy as np
import sys
sys.path.append('..')
from datetime import datetime, timedelta
# Time-series models
from geobot.timeseries import (
VARModel,
SVARModel,
DynamicFactorModel,
GrangerCausality,
UnivariateHawkesProcess,
MultivariateHawkesProcess,
ConflictContagionModel
)
# Quasi-experimental methods
from geobot.models import (
SyntheticControlMethod,
DifferenceinDifferences,
RegressionDiscontinuity,
InstrumentalVariables
)
def demo_var_model():
"""Demonstrate Vector Autoregression for multi-country forecasting."""
print("\n" + "="*80)
print("1. Vector Autoregression (VAR) - Multi-Country Spillovers")
print("="*80)
# Simulate data for 3 countries
# Country dynamics with interdependencies
np.random.seed(42)
T = 100
n_vars = 3
# Generate VAR(2) data
# Y_t = A_1 Y_{t-1} + A_2 Y_{t-2} + noise
A1 = np.array([
[0.5, 0.2, 0.1], # Country 1: affected by all
[0.1, 0.6, 0.15], # Country 2: strong self-dependence
[0.05, 0.1, 0.55] # Country 3: weak spillovers
])
A2 = np.array([
[0.2, 0.05, 0.0],
[0.1, 0.1, 0.05],
[0.0, 0.05, 0.2]
])
# Simulate
data = np.zeros((T, n_vars))
data[0] = np.random.randn(n_vars) * 0.1
data[1] = np.random.randn(n_vars) * 0.1
for t in range(2, T):
data[t] = (A1 @ data[t-1] + A2 @ data[t-2] +
np.random.randn(n_vars) * 0.1)
print(f"\nSimulated {T} time periods for {n_vars} countries")
print(f"Variables: GDP growth, Military spending, Stability index\n")
# Fit VAR model
var = VARModel(n_lags=2)
variable_names = ['GDP_growth', 'Military_spend', 'Stability']
results = var.fit(data, variable_names)
print(f"VAR({results.n_lags}) Estimation Results:")
print(f" Log-likelihood: {results.log_likelihood:.2f}")
print(f" AIC: {results.aic:.2f}")
print(f" BIC: {results.bic:.2f}")
# Forecast
forecast = var.forecast(results, steps=10)
print(f"\n10-step ahead forecast:")
print(f" GDP growth: {forecast[-1, 0]:.3f}")
print(f" Military spending: {forecast[-1, 1]:.3f}")
print(f" Stability: {forecast[-1, 2]:.3f}")
# Granger causality
print("\nGranger Causality Tests:")
for i in range(n_vars):
for j in range(n_vars):
if i != j:
gc_result = var.granger_causality(results, i, j)
if gc_result['p_value'] < 0.05:
print(f" {variable_names[j]} β {variable_names[i]}: "
f"F={gc_result['f_statistic']:.2f}, p={gc_result['p_value']:.3f} β")
# Impulse response functions
irf_result = var.impulse_response(results, steps=10)
print("\nImpulse Response Functions computed (10 steps)")
print(f" Shock to Military spending β GDP growth at t=5: {irf_result.irf[0, 1, 5]:.4f}")
# Forecast error variance decomposition
fevd = var.forecast_error_variance_decomposition(results, steps=10)
print("\nForecast Error Variance Decomposition (horizon=10):")
for i, var_name in enumerate(variable_names):
contributions = fevd[i, :, -1]
print(f" {var_name} variance explained by:")
for j, source_name in enumerate(variable_names):
print(f" {source_name}: {contributions[j]:.1%}")
print("\nβ VAR model demonstrates multi-country interdependencies!")
def demo_hawkes_process():
"""Demonstrate Hawkes processes for conflict contagion."""
print("\n" + "="*80)
print("2. Hawkes Processes - Conflict Escalation and Contagion")
print("="*80)
# Simulate conflict events
print("\nSimulating conflict events with self-excitation...")
hawkes = UnivariateHawkesProcess()
# Parameters: baseline=0.3, excitation=0.6, decay=1.2
# Branching ratio = 0.6/1.2 = 0.5 (stable, subcritical)
events = hawkes.simulate(mu=0.3, alpha=0.6, beta=1.2, T=100.0)
print(f"Generated {len(events)} conflict events over 100 time units")
print(f"Average rate: {len(events) / 100.0:.2f} events/unit\n")
# Fit model
result = hawkes.fit(events, T=100.0)
print("Estimated Hawkes Parameters:")
print(f" Baseline intensity (ΞΌ): {result.params.mu:.3f}")
print(f" Excitation (Ξ±): {result.params.alpha:.3f}")
print(f" Decay rate (Ξ²): {result.params.beta:.3f}")
print(f" Branching ratio: {result.params.branching_ratio:.3f}")
print(f" Process is {'STABLE' if result.params.is_stable else 'EXPLOSIVE'}")
# Predict intensity
t_future = 105.0
intensity = hawkes.predict_intensity(events, result.params, t_future)
print(f"\nPredicted conflict intensity at t={t_future}: {intensity:.3f}")
# Multivariate: conflict contagion between countries
print("\n" + "-"*80)
print("Multivariate Hawkes: Cross-Country Conflict Contagion")
print("-"*80)
countries = ['Syria', 'Iraq', 'Lebanon']
contagion_model = ConflictContagionModel(countries=countries)
# Simulate with cross-excitation
mu = np.array([0.5, 0.3, 0.2]) # Different baseline rates
alpha = np.array([
[0.3, 0.15, 0.1], # Syria: high self-excitation, moderate contagion
[0.2, 0.25, 0.1], # Iraq: affected by Syria
[0.15, 0.1, 0.2] # Lebanon: affected by both
])
beta = np.ones((3, 3)) * 1.5
multi_hawkes = MultivariateHawkesProcess(n_dimensions=3)
events_multi = multi_hawkes.simulate(mu=mu, alpha=alpha, beta=beta, T=100.0)
print(f"\nSimulated events:")
for i, country in enumerate(countries):
print(f" {country}: {len(events_multi[i])} events")
# Fit multivariate model
events_dict = {country: events_multi[i] for i, country in enumerate(countries)}
fit_result = contagion_model.fit(events_dict, T=100.0)
print(f"\nFitted contagion model:")
print(f" Spectral radius: {fit_result['spectral_radius']:.3f} (< 1 = stable)")
print(f" Most contagious source: {fit_result['most_contagious_source']}")
print(f" Most vulnerable target: {fit_result['most_vulnerable_target']}")
# Identify contagion pathways
pathways = contagion_model.identify_contagion_pathways(fit_result, threshold=0.1)
print("\nSignificant contagion pathways (branching ratio > 0.1):")
for source, target, strength in pathways[:5]:
print(f" {source} β {target}: {strength:.3f}")
# Risk assessment
risks = contagion_model.contagion_risk(events_dict, fit_result, t=105.0, horizon=5.0)
print("\nConflict risk over next 5 time units:")
for country, risk in risks.items():
print(f" {country}: {risk:.1%}")
print("\nβ Hawkes processes capture conflict escalation dynamics!")
def demo_synthetic_control():
"""Demonstrate Synthetic Control Method."""
print("\n" + "="*80)
print("3. Synthetic Control Method - Policy Impact Estimation")
print("="*80)
# Scenario: Estimate effect of sanctions on target country's GDP
print("\nScenario: Economic sanctions imposed on Country A at t=50")
print("Question: What is the causal effect on GDP growth?\n")
# Generate data
np.random.seed(42)
T = 100
J = 10 # 10 control countries
# Pre-treatment: all countries follow similar trends
time = np.arange(T)
trend = 0.02 * time + np.random.randn(T) * 0.1
# Control countries
control_outcomes = np.zeros((T, J))
for j in range(J):
control_outcomes[:, j] = trend + np.random.randn(T) * 0.15 + np.random.randn() * 0.5
# Treated country (matches controls pre-treatment)
treated_outcome = trend + np.random.randn(T) * 0.15
# Treatment effect: negative shock starting at t=50
treatment_time = 50
true_effect = -0.8
treated_outcome[treatment_time:] += true_effect + np.random.randn(T - treatment_time) * 0.1
# Fit SCM
scm = SyntheticControlMethod()
result = scm.fit(
treated_outcome=treated_outcome,
control_outcomes=control_outcomes,
treatment_time=treatment_time,
control_names=[f"Country_{j+1}" for j in range(J)]
)
print("Synthetic Control Results:")
print(f" Pre-treatment fit (RMSPE): {result.pre_treatment_fit:.4f}")
print(f"\nSynthetic Country A is weighted combination of:")
for j, weight in enumerate(result.weights):
if weight > 0.01: # Only show significant weights
print(f" {result.control_units[j]}: {weight:.1%}")
# Treatment effects
avg_effect = np.mean(result.treatment_effect[treatment_time:])
print(f"\nEstimated treatment effect (post-sanctions):")
print(f" Average: {avg_effect:.3f} (true effect: {true_effect:.3f})")
print(f" Final period: {result.treatment_effect[-1]:.3f}")
# Placebo test
p_value = scm.placebo_test(treated_outcome, control_outcomes, treatment_time, n_permutations=J)
print(f"\nPlacebo test p-value: {p_value:.3f}")
if p_value < 0.05:
print(" β Effect is statistically significant (unusual compared to placebos)")
else:
print(" β Effect not significant (could be random)")
print("\nβ Synthetic control provides credible counterfactual!")
def demo_difference_in_differences():
"""Demonstrate Difference-in-Differences."""
print("\n" + "="*80)
print("4. Difference-in-Differences (DiD) - Regime Change Analysis")
print("="*80)
# Scenario: Regime change in treated country
print("\nScenario: Regime change in Country T at t=50")
print("Compare to similar countries without regime change\n")
np.random.seed(42)
# Pre-treatment (similar trends)
treated_pre = 3.0 + np.random.randn(50) * 0.5
control_pre = 3.2 + np.random.randn(50) * 0.5
# Post-treatment (treatment effect = +1.5 on outcome)
true_effect = 1.5
treated_post = 3.0 + true_effect + np.random.randn(50) * 0.5
control_post = 3.2 + np.random.randn(50) * 0.5 # No effect
# Estimate DiD
did = DifferenceinDifferences()
result = did.estimate(treated_pre, treated_post, control_pre, control_post)
print("Difference-in-Differences Results:")
print(f"\n Pre-treatment difference: {result.pre_treatment_diff:.3f}")
print(f" Post-treatment difference: {result.post_treatment_diff:.3f}")
print(f"\n Average Treatment Effect (ATT): {result.att:.3f}")
print(f" Standard error: {result.se:.3f}")
print(f" t-statistic: {result.t_stat:.3f}")
print(f" p-value: {result.p_value:.4f}")
if result.p_value < 0.05:
print(f"\n β Regime change had significant effect (true effect: {true_effect:.3f})")
else:
print("\n β Effect not statistically significant")
# Assumption check
if abs(result.pre_treatment_diff) < 0.5:
print("\n β Parallel trends assumption plausible (small pre-treatment diff)")
else:
print("\n β Parallel trends questionable (large pre-treatment diff)")
print("\nβ DiD isolates causal effect of regime change!")
def demo_regression_discontinuity():
"""Demonstrate Regression Discontinuity Design."""
print("\n" + "="*80)
print("5. Regression Discontinuity Design (RDD) - Election Effects")
print("="*80)
# Scenario: Effect of winning election on military policy
print("\nScenario: Effect of hawkish candidate winning election")
print("Running variable: Vote share (cutoff = 50%)")
print("Outcome: Military spending increase\n")
np.random.seed(42)
n = 500
# Vote share (running variable)
vote_share = np.random.uniform(0.3, 0.7, n)
# Outcome: military spending
# Smooth function of vote share + discontinuity at 50%
outcome = 2.0 + 1.5 * vote_share + np.random.randn(n) * 0.3
# Treatment effect: +0.8 if vote > 50%
true_effect = 0.8
outcome[vote_share >= 0.5] += true_effect
# Estimate RDD
rdd = RegressionDiscontinuity(cutoff=0.5)
result = rdd.estimate_sharp(
running_var=vote_share,
outcome=outcome,
bandwidth=0.15, # 15% bandwidth
kernel='triangular'
)
print("Regression Discontinuity Results:")
print(f"\n Bandwidth: {result.bandwidth:.3f}")
print(f" Observations below cutoff: {result.n_left}")
print(f" Observations above cutoff: {result.n_right}")
print(f"\n Treatment effect (LATE): {result.treatment_effect:.3f}")
print(f" Standard error: {result.se:.3f}")
print(f" t-statistic: {result.t_stat:.3f}")
print(f" p-value: {result.p_value:.4f}")
if result.p_value < 0.05:
print(f"\n β Winning election causes increase in military spending")
print(f" (true effect: {true_effect:.3f})")
else:
print("\n β Effect not statistically significant")
print("\nβ RDD exploits threshold-based treatment assignment!")
def demo_instrumental_variables():
"""Demonstrate Instrumental Variables."""
print("\n" + "="*80)
print("6. Instrumental Variables (IV) - Trade and Conflict")
print("="*80)
# Scenario: Effect of trade on conflict (trade is endogenous)
print("\nScenario: Does trade reduce conflict?")
print("Problem: Trade is endogenous (reverse causality, omitted variables)")
print("Instrument: Geographic distance to major trade routes\n")
np.random.seed(42)
n = 300
# Instrument: distance (exogenous)
distance = np.random.uniform(100, 1000, n)
# Unobserved confounders
unobserved = np.random.randn(n)
# Trade (endogenous): affected by distance and confounders
trade = 50 - 0.03 * distance + 2.0 * unobserved + np.random.randn(n) * 5
# Conflict: true effect of trade = -0.15, but also affected by confounders
true_effect = -0.15
conflict = 10 + true_effect * trade - 1.5 * unobserved + np.random.randn(n) * 2
# Estimate with IV
iv = InstrumentalVariables()
result = iv.estimate_2sls(
outcome=conflict,
endogenous=trade,
instrument=distance
)
print("Instrumental Variables (2SLS) Results:")
print(f"\n First stage F-statistic: {result.first_stage_f:.2f}")
if result.weak_instrument:
print(" β Warning: Weak instrument (F < 10)")
else:
print(" β Strong instrument (F > 10)")
print(f"\n OLS estimate (biased): {result.beta_ols[0]:.4f}")
print(f" IV estimate (consistent): {result.beta_iv[0]:.4f}")
print(f" IV standard error: {result.se_iv[0]:.4f}")
print(f"\n True causal effect: {true_effect:.4f}")
# Hausman test (informal)
if abs(result.beta_ols[0] - result.beta_iv[0]) > 0.05:
print("\n β OLS and IV differ substantially β endogeneity present")
print(" IV corrects for bias!")
else:
print("\n OLS and IV similar β endogeneity may be small")
print("\nβ IV isolates causal effect using exogenous variation!")
def demo_dynamic_factor_model():
"""Demonstrate Dynamic Factor Model for nowcasting."""
print("\n" + "="*80)
print("7. Dynamic Factor Model (DFM) - High-Dimensional Nowcasting")
print("="*80)
# Scenario: Nowcast geopolitical tension from many indicators
print("\nScenario: Nowcast regional tension from 50 economic/political indicators")
print("DFM extracts common latent factors driving all indicators\n")
np.random.seed(42)
T = 200
n_indicators = 50
n_factors = 3
# True factors (latent tensions)
true_factors = np.zeros((T, n_factors))
for k in range(n_factors):
# AR(1) dynamics
for t in range(1, T):
true_factors[t, k] = 0.8 * true_factors[t-1, k] + np.random.randn() * 0.5
# Factor loadings (how indicators load on factors)
true_loadings = np.random.randn(n_indicators, n_factors)
# Observed indicators = factors * loadings + idiosyncratic noise
data = true_factors @ true_loadings.T + np.random.randn(T, n_indicators) * 0.5
# Fit DFM
dfm = DynamicFactorModel(n_factors=3, n_lags=1)
model = dfm.fit(data)
print(f"Dynamic Factor Model Results:")
print(f"\n Number of indicators: {n_indicators}")
print(f" Number of factors: {n_factors}")
print(f" Explained variance: {model['explained_variance_ratio']:.1%}")
# Extracted factors
factors = model['factors']
print(f"\n Extracted factor dimensions: {factors.shape}")
print(f" Factor 1 final value: {factors[-1, 0]:.3f}")
print(f" Factor 2 final value: {factors[-1, 1]:.3f}")
print(f" Factor 3 final value: {factors[-1, 2]:.3f}")
# Forecast
forecast = dfm.forecast(model, steps=10)
print(f"\n 10-step ahead forecast dimensions: {forecast.shape}")
print(f" Average forecasted indicator value: {np.mean(forecast[-1]):.3f}")
# Correlation with true factors
corr_0 = np.corrcoef(true_factors[:, 0], factors[:, 0])[0, 1]
print(f"\n Factor recovery (correlation with true): {abs(corr_0):.3f}")
print("\nβ DFM reduces dimensionality while preserving information!")
def main():
"""Run all demonstrations of final features."""
print("=" * 80)
print("GeoBotv1 - COMPLETE FRAMEWORK DEMONSTRATION")
print("=" * 80)
print("\nThis example showcases the final components that complete GeoBotv1:")
print("β’ Vector Autoregression (VAR/SVAR/DFM)")
print("β’ Hawkes Processes for conflict contagion")
print("β’ Quasi-Experimental Causal Inference")
print(" - Synthetic Control Method")
print(" - Difference-in-Differences")
print(" - Regression Discontinuity Design")
print(" - Instrumental Variables")
# Run all demonstrations
demo_var_model()
demo_hawkes_process()
demo_synthetic_control()
demo_difference_in_differences()
demo_regression_discontinuity()
demo_instrumental_variables()
demo_dynamic_factor_model()
print("\n" + "=" * 80)
print("GeoBotv1 Framework is NOW 100% COMPLETE!")
print("=" * 80)
print("\nπ All Research-Grade Mathematical Components Implemented:")
print("\nπ CORE FRAMEWORKS:")
print(" β Optimal Transport (Wasserstein, Kantorovich, Sinkhorn)")
print(" β Causal Inference (DAGs, SCMs, Do-Calculus)")
print(" β Bayesian Inference (MCMC, Particle Filters, VI)")
print(" β Stochastic Processes (SDEs, Jump-Diffusion)")
print(" β Time-Series Models (Kalman, HMM, VAR, Hawkes)")
print(" β Quasi-Experimental Methods (SCM, DiD, RDD, IV)")
print(" β Machine Learning (GNNs, Risk Scoring, Embeddings)")
print("\nπ SPECIALIZED CAPABILITIES:")
print(" β Multi-country interdependency modeling (VAR)")
print(" β Conflict contagion and escalation (Hawkes)")
print(" β Policy counterfactuals (Synthetic Control)")
print(" β Regime change effects (Difference-in-Differences)")
print(" β Election outcomes impact (Regression Discontinuity)")
print(" β Trade-conflict nexus (Instrumental Variables)")
print(" β High-dimensional nowcasting (Dynamic Factor Models)")
print("\n㪠MATHEMATICAL RIGOR:")
print(" β Measure-theoretic probability foundations")
print(" β Continuous-time dynamics (SDEs)")
print(" β Causal identification strategies")
print(" β Structural econometric methods")
print(" β Point process theory")
print(" β Optimal transport geometry")
print("\nπ‘ GeoBotv1 is ready for production geopolitical forecasting!")
print("=" * 80 + "\n")
if __name__ == "__main__":
main()
|