File size: 5,796 Bytes
484e3bc |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 |
"""
Settings and configuration for GeoBotv1
"""
from dataclasses import dataclass, field
from typing import Dict, Any, Optional
import yaml
from pathlib import Path
@dataclass
class Settings:
"""
Global settings for GeoBotv1.
"""
# Simulation settings
default_n_simulations: int = 1000
default_time_horizon: int = 100
random_seed: Optional[int] = None
# Data ingestion settings
pdf_extraction_method: str = 'auto'
web_scraping_timeout: int = 30
article_extraction_method: str = 'auto'
# ML settings
risk_scoring_method: str = 'gradient_boosting'
embedding_model: str = 'sentence-transformers/all-MiniLM-L6-v2'
# Bayesian inference settings
bayesian_method: str = 'grid'
n_mcmc_samples: int = 10000
# Causal inference settings
causal_discovery_method: str = 'pc'
causal_discovery_alpha: float = 0.05
# Data directories
data_dir: str = 'data'
cache_dir: str = '.cache'
output_dir: str = 'output'
# Logging
log_level: str = 'INFO'
log_file: Optional[str] = None
# Custom settings
custom: Dict[str, Any] = field(default_factory=dict)
def to_dict(self) -> Dict[str, Any]:
"""Convert settings to dictionary."""
return {
'simulation': {
'default_n_simulations': self.default_n_simulations,
'default_time_horizon': self.default_time_horizon,
'random_seed': self.random_seed
},
'data_ingestion': {
'pdf_extraction_method': self.pdf_extraction_method,
'web_scraping_timeout': self.web_scraping_timeout,
'article_extraction_method': self.article_extraction_method
},
'ml': {
'risk_scoring_method': self.risk_scoring_method,
'embedding_model': self.embedding_model
},
'bayesian': {
'method': self.bayesian_method,
'n_mcmc_samples': self.n_mcmc_samples
},
'causal': {
'discovery_method': self.causal_discovery_method,
'discovery_alpha': self.causal_discovery_alpha
},
'directories': {
'data_dir': self.data_dir,
'cache_dir': self.cache_dir,
'output_dir': self.output_dir
},
'logging': {
'log_level': self.log_level,
'log_file': self.log_file
},
'custom': self.custom
}
@classmethod
def from_dict(cls, data: Dict[str, Any]) -> 'Settings':
"""Load settings from dictionary."""
settings = cls()
if 'simulation' in data:
settings.default_n_simulations = data['simulation'].get('default_n_simulations', 1000)
settings.default_time_horizon = data['simulation'].get('default_time_horizon', 100)
settings.random_seed = data['simulation'].get('random_seed')
if 'data_ingestion' in data:
settings.pdf_extraction_method = data['data_ingestion'].get('pdf_extraction_method', 'auto')
settings.web_scraping_timeout = data['data_ingestion'].get('web_scraping_timeout', 30)
settings.article_extraction_method = data['data_ingestion'].get('article_extraction_method', 'auto')
if 'ml' in data:
settings.risk_scoring_method = data['ml'].get('risk_scoring_method', 'gradient_boosting')
settings.embedding_model = data['ml'].get('embedding_model', 'sentence-transformers/all-MiniLM-L6-v2')
if 'bayesian' in data:
settings.bayesian_method = data['bayesian'].get('method', 'grid')
settings.n_mcmc_samples = data['bayesian'].get('n_mcmc_samples', 10000)
if 'causal' in data:
settings.causal_discovery_method = data['causal'].get('discovery_method', 'pc')
settings.causal_discovery_alpha = data['causal'].get('discovery_alpha', 0.05)
if 'directories' in data:
settings.data_dir = data['directories'].get('data_dir', 'data')
settings.cache_dir = data['directories'].get('cache_dir', '.cache')
settings.output_dir = data['directories'].get('output_dir', 'output')
if 'logging' in data:
settings.log_level = data['logging'].get('log_level', 'INFO')
settings.log_file = data['logging'].get('log_file')
if 'custom' in data:
settings.custom = data['custom']
return settings
def save(self, path: str) -> None:
"""Save settings to YAML file."""
with open(path, 'w') as f:
yaml.dump(self.to_dict(), f, default_flow_style=False)
@classmethod
def load(cls, path: str) -> 'Settings':
"""Load settings from YAML file."""
with open(path, 'r') as f:
data = yaml.safe_load(f)
return cls.from_dict(data)
# Global settings instance
_global_settings: Optional[Settings] = None
def get_settings() -> Settings:
"""
Get global settings instance.
Returns
-------
Settings
Global settings
"""
global _global_settings
if _global_settings is None:
_global_settings = Settings()
return _global_settings
def update_settings(settings: Settings) -> None:
"""
Update global settings.
Parameters
----------
settings : Settings
New settings
"""
global _global_settings
_global_settings = settings
def load_settings_from_file(path: str) -> None:
"""
Load settings from file and update global settings.
Parameters
----------
path : str
Path to settings file
"""
settings = Settings.load(path)
update_settings(settings)
|