Spaces:
Sleeping
Sleeping
File size: 10,142 Bytes
cc2e1db |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 |
"""Compute reliability indices (SAIFI, SAIDI, CAIDI, MAIFI) from outage event CSV.
Usage (programmatic):
from scripts.compute_reliability import compute_reliability
summary = compute_reliability('data/data_1.csv', total_customers=500000)
Usage (CLI):
python scripts/compute_reliability.py --input data/data_1.csv --total-customers 500000
Assumptions & mapping (from inspected CSV):
- Outage start: `OutageDateTime`
- Outage end: prefer `CloseEventDateTime`, else `LastRestoDateTime`, else `FirstRestoDateTime`
- Customers affected: prefer `AffectedCustomer` column; else sum `AffectedCustomer1..5`; else `AllStepCusXTime` or `AllStepCusXTime1..5` fallback.
- Planned outages: rows with `EventType` containing 'แผน' (e.g., 'แผนดับไฟ') are considered planned and can be excluded by default.
- Date format is day-first like '10-01-2025 10:28:00'.
Outputs saved to `outputs/reliability_summary.csv` and breakdown CSVs.
"""
from __future__ import annotations
import argparse
from typing import Optional, Dict
import pandas as pd
import numpy as np
from pathlib import Path
DATE_COLS = ['OutageDateTime', 'FirstRestoDateTime', 'LastRestoDateTime', 'CreateEventDateTime', 'CloseEventDateTime']
def parse_dates(df: pd.DataFrame) -> pd.DataFrame:
for c in DATE_COLS:
if c in df.columns:
# many dates are in format dd-mm-YYYY HH:MM:SS
df[c] = pd.to_datetime(df[c], dayfirst=True, errors='coerce')
return df
def coalesce_end_time(row: pd.Series) -> pd.Timestamp | None:
for c in ('CloseEventDateTime', 'LastRestoDateTime', 'FirstRestoDateTime', 'CreateEventDateTime'):
if c in row and pd.notna(row[c]):
return row[c]
return pd.NaT
def estimate_customers(row: pd.Series) -> float:
# Prefer AffectedCustomer if present and numeric
def to_num(x):
try:
if pd.isna(x) or x == '':
return np.nan
return float(x)
except Exception:
return np.nan
cols = row.index
# Try AffectedCustomer
if 'AffectedCustomer' in cols:
v = to_num(row['AffectedCustomer'])
if not np.isnan(v):
return v
# Sum AffectedCustomer1..5
acs = []
for i in range(1, 6):
k = f'AffectedCustomer{i}'
if k in cols:
acs.append(to_num(row[k]))
acs = [x for x in acs if not np.isnan(x)]
if acs:
return float(sum(acs))
# Try AllStepCusXTime or AllStepCusXTime1..5
if 'AllStepCusXTime' in cols:
v = to_num(row['AllStepCusXTime'])
if not np.isnan(v):
return v
asts = []
for i in range(1, 6):
k = f'AllStepCusXTime{i}'
if k in cols:
asts.append(to_num(row[k]))
asts = [x for x in asts if not np.isnan(x)]
if asts:
return float(sum(asts))
# As last resort, try numeric columns near end: Capacity(kVA) or Load(MW) are not customer counts
return np.nan
def flag_planned(event_type: Optional[str]) -> bool:
if pd.isna(event_type):
return False
s = str(event_type)
# In this dataset planned outages use Thai word 'แผน'
if 'แผน' in s:
return True
# else treat as unplanned
return False
def compute_reliability(
input_csv: str | Path,
total_customers: Optional[float] = None,
customers_map: Optional[Dict[str, float]] = None,
exclude_planned: bool = True,
momentary_threshold_min: float = 1.0,
groupby_cols: list[str] | None = None,
out_dir: str | Path | None = 'outputs',
) -> Dict[str, pd.DataFrame]:
"""Reads CSV and computes reliability indices.
Returns a dict of DataFrames: overall, by_group.
"""
input_csv = Path(input_csv)
out_dir = Path(out_dir)
out_dir.mkdir(parents=True, exist_ok=True)
df = pd.read_csv(input_csv, dtype=str)
# parse dates
df = parse_dates(df)
# coalesce end time
df['OutageStart'] = df.get('OutageDateTime')
df['OutageEnd'] = df.apply(coalesce_end_time, axis=1)
# compute duration in minutes
df['DurationMin'] = (pd.to_datetime(df['OutageEnd']) - pd.to_datetime(df['OutageStart'])).dt.total_seconds() / 60.0
# customers affected
df['CustomersAffected'] = df.apply(estimate_customers, axis=1)
# flag planned
df['IsPlanned'] = df['EventType'].apply(flag_planned) if 'EventType' in df.columns else False
if exclude_planned:
df_work = df[~df['IsPlanned']].copy()
else:
df_work = df.copy()
# Fill missing durations (negative or NaN) with 0
df_work['DurationMin'] = df_work['DurationMin'].fillna(0)
df_work.loc[df_work['DurationMin'] < 0, 'DurationMin'] = 0
# ensure numeric customers
df_work['CustomersAffected'] = pd.to_numeric(df_work['CustomersAffected'], errors='coerce').fillna(0)
# Choose grouping
if groupby_cols is None:
groupby_cols = []
# helper to compute indices given total customers
def compute_from_df(dfall: pd.DataFrame, cust_total: float) -> Dict[str, float]:
total_interruptions = dfall['CustomersAffected'].sum()
total_customer_minutes = (dfall['CustomersAffected'] * dfall['DurationMin']).sum()
# momentary interruptions: durations less than threshold
momentary_interruptions = dfall.loc[dfall['DurationMin'] < momentary_threshold_min, 'CustomersAffected'].sum()
saifi = total_interruptions / cust_total if cust_total and cust_total > 0 else np.nan
saidi = total_customer_minutes / cust_total if cust_total and cust_total > 0 else np.nan
caidi = (saidi / saifi) if (saifi and saifi > 0) else np.nan
maifi = momentary_interruptions / cust_total if cust_total and cust_total > 0 else np.nan
return {
'TotalInterruptions': total_interruptions,
'TotalCustomerMinutes': total_customer_minutes,
'MomentaryInterruptions': momentary_interruptions,
'SAIFI': saifi,
'SAIDI': saidi,
'CAIDI': caidi,
'MAIFI': maifi,
}
results = {}
if customers_map is not None:
# customers_map expects keys matching grouping (e.g., Feeder or AffectedAreaID). We'll compute per key
# Overall must supply a 'TOTAL' or we sum map values
total_customers_map_sum = sum(customers_map.values())
overall = compute_from_df(df_work, total_customers_map_sum if total_customers is None else total_customers)
results['overall'] = pd.DataFrame([overall])
# per-group
if groupby_cols:
group = df_work.groupby(groupby_cols).agg({'CustomersAffected': 'sum', 'DurationMin': 'mean'})
else:
# if no group col provided, try Feeder then AffectedAreaID
if 'Feeder' in df_work.columns:
groupby_cols = ['Feeder']
elif 'AffectedAreaID' in df_work.columns:
groupby_cols = ['AffectedAreaID']
else:
groupby_cols = []
if groupby_cols:
rows = []
for key, sub in df_work.groupby(groupby_cols):
# key can be tuple
keyname = key if isinstance(key, str) else '_'.join(map(str, key))
cust = customers_map.get(keyname, np.nan)
metrics = compute_from_df(sub, cust if not np.isnan(cust) else np.nan)
metrics.update({'Group': keyname})
rows.append(metrics)
results['by_group'] = pd.DataFrame(rows)
else:
results['by_group'] = pd.DataFrame()
else:
# customers_map not provided: require total_customers
if total_customers is None:
raise ValueError('Either total_customers or customers_map must be provided to compute per-customer indices')
overall = compute_from_df(df_work, float(total_customers))
results['overall'] = pd.DataFrame([overall])
# per-group breakdowns (SAIFI-like per 1000 customers will use proportion of total customers by share)
if groupby_cols:
rows = []
# If we don't have customers per group, we will compute interruption counts and durations but can't compute per-customer normalized indices without providing customers_map.
for key, sub in df_work.groupby(groupby_cols):
keyname = key if isinstance(key, str) else '_'.join(map(str, key))
rows.append({
'Group': keyname,
'TotalInterruptions': sub['CustomersAffected'].sum(),
'TotalCustomerMinutes': (sub['CustomersAffected'] * sub['DurationMin']).sum(),
'Events': len(sub),
})
results['by_group'] = pd.DataFrame(rows)
else:
results['by_group'] = pd.DataFrame()
# Save CSVs
results['raw'] = df_work
results['raw'].to_csv(out_dir / 'events_cleaned.csv', index=False)
results['overall'].to_csv(out_dir / 'reliability_overall.csv', index=False)
if 'by_group' in results:
results['by_group'].to_csv(out_dir / 'reliability_by_group.csv', index=False)
return results
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('--input', '-i', required=True, help='Input CSV file')
parser.add_argument('--total-customers', type=float, help='Total customers served in the system (required if no customers map)')
parser.add_argument('--exclude-planned', action='store_true', help='Exclude planned outages (default True)')
parser.add_argument('--momentary-threshold-min', type=float, default=1.0, help='Threshold in minutes for momentary interruption')
parser.add_argument('--groupby', nargs='*', default=['Feeder'], help='Columns to group by for breakdown (default: Feeder)')
args = parser.parse_args()
res = compute_reliability(args.input, total_customers=args.total_customers, exclude_planned=args.exclude_planned, momentary_threshold_min=args.momentary_threshold_min, groupby_cols=args.groupby)
print('Wrote outputs to outputs/ (events_cleaned.csv, reliability_overall.csv, reliability_by_group.csv)')
|