File size: 4,616 Bytes
8247cec
 
 
fe53f63
 
8247cec
fe53f63
 
 
 
 
 
 
 
 
 
 
 
 
6fd38f2
fe53f63
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3547053
fe53f63
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
import numpy as np
import pandas as pd
import streamlit as st
import joblib
from pathlib import Path

# -------------------------------
# Page config
# -------------------------------
st.set_page_config(
    page_title='Rainfall Probability Predictor (LogReg)',
    page_icon='🌧️',
    layout='centered'
)

st.title('🌧️ Rainfall Probability Predictor')
st.write('Predicts the probability of rainfall (0–1) using a Logistic Regression model trained on weather features.')

BASE_DIR = Path(__file__).resolve().parent
MODEL_PATH = BASE_DIR / 'lr_final_model.pkl'
FEATURE_NAMES_PATH = BASE_DIR / 'feature_names.pkl'


@st.cache_resource
def load_artifacts():
    if not MODEL_PATH.exists():
        raise FileNotFoundError(
            f'Model not found: {MODEL_PATH.name}. Put it in the repo root (same folder as app.py).'
        )
    if not FEATURE_NAMES_PATH.exists():
        raise FileNotFoundError(
            f'Feature names not found: {FEATURE_NAMES_PATH.name}. Put it in the repo root (same folder as app.py).'
        )

    model = joblib.load(MODEL_PATH)
    feature_names = joblib.load(FEATURE_NAMES_PATH)

    if not isinstance(feature_names, list) or len(feature_names) == 0:
        raise ValueError('feature_names.pkl must contain a non-empty list of column names.')

    return model, feature_names


def add_features(df: pd.DataFrame) -> pd.DataFrame:
    df = df.copy()

    # Temperature variability
    df['temp_range'] = df['maxtemp'] - df['mintemp']
    # Air saturation level
    df['humidity_gap'] = df['humidity'] - df['dewpoint']
    # Sunshine vs clouds
    df['sunshine_ratio'] = df['sunshine'] / (df['cloud'] + 1)
    # Wind intensity (simple interaction)
    df['wind_energy'] = df['windspeed'] * df['winddirection']
    # Seasonal pattern
    df['sin_day'] = np.sin(2 * np.pi * df['day'] / 365)
    df['cos_day'] = np.cos(2 * np.pi * df['day'] / 365)

    return df


model, feature_names = load_artifacts()

st.subheader('Input features')

# Note: ranges are generic. If you want, you can set them based on df1.describe().
col1, col2 = st.columns(2)

with col1:
    day = st.number_input('day (1–365)', min_value=1, max_value=365, value=100, step=1)
    pressure = st.number_input('pressure', value=1013.0, step=0.1)
    maxtemp = st.number_input('maxtemp', value=20.0, step=0.1)
    temperature = st.number_input('temperature', value=15.0, step=0.1)
    mintemp = st.number_input('mintemp', value=10.0, step=0.1)

with col2:
    dewpoint = st.number_input('dewpoint', value=8.0, step=0.1)
    humidity = st.number_input('humidity', value=70.0, step=0.1)
    cloud = st.number_input('cloud', value=50.0, step=1.0)
    sunshine = st.number_input('sunshine', value=5.0, step=0.1)
    windspeed = st.number_input('windspeed', value=10.0, step=0.1)
    winddirection = st.number_input('winddirection', value=180.0, step=1.0)

# Build one-row dataframe with the ORIGINAL base features
input_df = pd.DataFrame([{
    'day': float(day),
    'pressure': float(pressure),
    'maxtemp': float(maxtemp),
    'temparature': float(temperature),
    'mintemp': float(mintemp),
    'dewpoint': float(dewpoint),
    'humidity': float(humidity),
    'cloud': float(cloud),
    'sunshine': float(sunshine),
    'windspeed': float(windspeed),
    'winddirection': float(winddirection)
}])


# Add engineered features (must match training)
input_df = add_features(input_df)

# Ensure correct feature order and missing columns safety
missing_cols = [c for c in feature_names if c not in input_df.columns]
extra_cols = [c for c in input_df.columns if c not in feature_names]

if missing_cols:
    st.error(f'Missing required feature columns: {missing_cols}')
    st.stop()

# Keep only the expected columns in the correct order
X = input_df[feature_names].copy()

st.divider()

if st.button('Predict rainfall probability'):
    try:
        proba = float(model.predict_proba(X)[:, 1][0])
        st.metric('Rainfall probability', f'{proba:.3f}', delta=None)
        st.progress(min(max(proba, 0.0), 1.0))

        if proba >= 0.7:
            st.success('High chance of rainfall.')
        elif proba >= 0.4:
            st.warning('Medium chance of rainfall.')
        else:
            st.info('Low chance of rainfall.')

        with st.expander('Show model input (debug)'):
            st.write('Used feature columns (ordered):')
            st.write(feature_names)
            st.dataframe(X)

        if extra_cols:
            st.caption(f'Note: These columns were ignored (not in feature_names): {extra_cols}')

    except Exception as e:
        st.error(f'Prediction failed: {e}')