File size: 4,616 Bytes
8247cec fe53f63 8247cec fe53f63 6fd38f2 fe53f63 3547053 fe53f63 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 | import numpy as np
import pandas as pd
import streamlit as st
import joblib
from pathlib import Path
# -------------------------------
# Page config
# -------------------------------
st.set_page_config(
page_title='Rainfall Probability Predictor (LogReg)',
page_icon='🌧️',
layout='centered'
)
st.title('🌧️ Rainfall Probability Predictor')
st.write('Predicts the probability of rainfall (0–1) using a Logistic Regression model trained on weather features.')
BASE_DIR = Path(__file__).resolve().parent
MODEL_PATH = BASE_DIR / 'lr_final_model.pkl'
FEATURE_NAMES_PATH = BASE_DIR / 'feature_names.pkl'
@st.cache_resource
def load_artifacts():
if not MODEL_PATH.exists():
raise FileNotFoundError(
f'Model not found: {MODEL_PATH.name}. Put it in the repo root (same folder as app.py).'
)
if not FEATURE_NAMES_PATH.exists():
raise FileNotFoundError(
f'Feature names not found: {FEATURE_NAMES_PATH.name}. Put it in the repo root (same folder as app.py).'
)
model = joblib.load(MODEL_PATH)
feature_names = joblib.load(FEATURE_NAMES_PATH)
if not isinstance(feature_names, list) or len(feature_names) == 0:
raise ValueError('feature_names.pkl must contain a non-empty list of column names.')
return model, feature_names
def add_features(df: pd.DataFrame) -> pd.DataFrame:
df = df.copy()
# Temperature variability
df['temp_range'] = df['maxtemp'] - df['mintemp']
# Air saturation level
df['humidity_gap'] = df['humidity'] - df['dewpoint']
# Sunshine vs clouds
df['sunshine_ratio'] = df['sunshine'] / (df['cloud'] + 1)
# Wind intensity (simple interaction)
df['wind_energy'] = df['windspeed'] * df['winddirection']
# Seasonal pattern
df['sin_day'] = np.sin(2 * np.pi * df['day'] / 365)
df['cos_day'] = np.cos(2 * np.pi * df['day'] / 365)
return df
model, feature_names = load_artifacts()
st.subheader('Input features')
# Note: ranges are generic. If you want, you can set them based on df1.describe().
col1, col2 = st.columns(2)
with col1:
day = st.number_input('day (1–365)', min_value=1, max_value=365, value=100, step=1)
pressure = st.number_input('pressure', value=1013.0, step=0.1)
maxtemp = st.number_input('maxtemp', value=20.0, step=0.1)
temperature = st.number_input('temperature', value=15.0, step=0.1)
mintemp = st.number_input('mintemp', value=10.0, step=0.1)
with col2:
dewpoint = st.number_input('dewpoint', value=8.0, step=0.1)
humidity = st.number_input('humidity', value=70.0, step=0.1)
cloud = st.number_input('cloud', value=50.0, step=1.0)
sunshine = st.number_input('sunshine', value=5.0, step=0.1)
windspeed = st.number_input('windspeed', value=10.0, step=0.1)
winddirection = st.number_input('winddirection', value=180.0, step=1.0)
# Build one-row dataframe with the ORIGINAL base features
input_df = pd.DataFrame([{
'day': float(day),
'pressure': float(pressure),
'maxtemp': float(maxtemp),
'temparature': float(temperature),
'mintemp': float(mintemp),
'dewpoint': float(dewpoint),
'humidity': float(humidity),
'cloud': float(cloud),
'sunshine': float(sunshine),
'windspeed': float(windspeed),
'winddirection': float(winddirection)
}])
# Add engineered features (must match training)
input_df = add_features(input_df)
# Ensure correct feature order and missing columns safety
missing_cols = [c for c in feature_names if c not in input_df.columns]
extra_cols = [c for c in input_df.columns if c not in feature_names]
if missing_cols:
st.error(f'Missing required feature columns: {missing_cols}')
st.stop()
# Keep only the expected columns in the correct order
X = input_df[feature_names].copy()
st.divider()
if st.button('Predict rainfall probability'):
try:
proba = float(model.predict_proba(X)[:, 1][0])
st.metric('Rainfall probability', f'{proba:.3f}', delta=None)
st.progress(min(max(proba, 0.0), 1.0))
if proba >= 0.7:
st.success('High chance of rainfall.')
elif proba >= 0.4:
st.warning('Medium chance of rainfall.')
else:
st.info('Low chance of rainfall.')
with st.expander('Show model input (debug)'):
st.write('Used feature columns (ordered):')
st.write(feature_names)
st.dataframe(X)
if extra_cols:
st.caption(f'Note: These columns were ignored (not in feature_names): {extra_cols}')
except Exception as e:
st.error(f'Prediction failed: {e}') |