Spaces:

EnYa32
/

RainfallPredictionClassification

Sleeping

App Files Files Community

RainfallPredictionClassification / src /streamlit_app.py

EnYa32

Update src/streamlit_app.py

3547053 verified 2 months ago

raw

history blame contribute delete

4.62 kB

	import numpy as np
	import pandas as pd
	import streamlit as st
	import joblib
	from pathlib import Path

	# -------------------------------
	# Page config
	# -------------------------------
	st.set_page_config(
	page_title='Rainfall Probability Predictor (LogReg)',
	page_icon='🌧️',
	layout='centered'
	)

	st.title('🌧️ Rainfall Probability Predictor')
	st.write('Predicts the probability of rainfall (0–1) using a Logistic Regression model trained on weather features.')

	BASE_DIR = Path(__file__).resolve().parent
	MODEL_PATH = BASE_DIR / 'lr_final_model.pkl'
	FEATURE_NAMES_PATH = BASE_DIR / 'feature_names.pkl'


	@st.cache_resource
	def load_artifacts():
	if not MODEL_PATH.exists():
	raise FileNotFoundError(
	f'Model not found: {MODEL_PATH.name}. Put it in the repo root (same folder as app.py).'
	)
	if not FEATURE_NAMES_PATH.exists():
	raise FileNotFoundError(
	f'Feature names not found: {FEATURE_NAMES_PATH.name}. Put it in the repo root (same folder as app.py).'
	)

	model = joblib.load(MODEL_PATH)
	feature_names = joblib.load(FEATURE_NAMES_PATH)

	if not isinstance(feature_names, list) or len(feature_names) == 0:
	raise ValueError('feature_names.pkl must contain a non-empty list of column names.')

	return model, feature_names


	def add_features(df: pd.DataFrame) -> pd.DataFrame:
	df = df.copy()

	# Temperature variability
	df['temp_range'] = df['maxtemp'] - df['mintemp']
	# Air saturation level
	df['humidity_gap'] = df['humidity'] - df['dewpoint']
	# Sunshine vs clouds
	df['sunshine_ratio'] = df['sunshine'] / (df['cloud'] + 1)
	# Wind intensity (simple interaction)
	df['wind_energy'] = df['windspeed'] * df['winddirection']
	# Seasonal pattern
	df['sin_day'] = np.sin(2 * np.pi * df['day'] / 365)
	df['cos_day'] = np.cos(2 * np.pi * df['day'] / 365)

	return df


	model, feature_names = load_artifacts()

	st.subheader('Input features')

	# Note: ranges are generic. If you want, you can set them based on df1.describe().
	col1, col2 = st.columns(2)

	with col1:
	day = st.number_input('day (1–365)', min_value=1, max_value=365, value=100, step=1)
	pressure = st.number_input('pressure', value=1013.0, step=0.1)
	maxtemp = st.number_input('maxtemp', value=20.0, step=0.1)
	temperature = st.number_input('temperature', value=15.0, step=0.1)
	mintemp = st.number_input('mintemp', value=10.0, step=0.1)

	with col2:
	dewpoint = st.number_input('dewpoint', value=8.0, step=0.1)
	humidity = st.number_input('humidity', value=70.0, step=0.1)
	cloud = st.number_input('cloud', value=50.0, step=1.0)
	sunshine = st.number_input('sunshine', value=5.0, step=0.1)
	windspeed = st.number_input('windspeed', value=10.0, step=0.1)
	winddirection = st.number_input('winddirection', value=180.0, step=1.0)

	# Build one-row dataframe with the ORIGINAL base features
	input_df = pd.DataFrame([{
	'day': float(day),
	'pressure': float(pressure),
	'maxtemp': float(maxtemp),
	'temparature': float(temperature),
	'mintemp': float(mintemp),
	'dewpoint': float(dewpoint),
	'humidity': float(humidity),
	'cloud': float(cloud),
	'sunshine': float(sunshine),
	'windspeed': float(windspeed),
	'winddirection': float(winddirection)
	}])


	# Add engineered features (must match training)
	input_df = add_features(input_df)

	# Ensure correct feature order and missing columns safety
	missing_cols = [c for c in feature_names if c not in input_df.columns]
	extra_cols = [c for c in input_df.columns if c not in feature_names]

	if missing_cols:
	st.error(f'Missing required feature columns: {missing_cols}')
	st.stop()

	# Keep only the expected columns in the correct order
	X = input_df[feature_names].copy()

	st.divider()

	if st.button('Predict rainfall probability'):
	try:
	proba = float(model.predict_proba(X)[:, 1][0])
	st.metric('Rainfall probability', f'{proba:.3f}', delta=None)
	st.progress(min(max(proba, 0.0), 1.0))

	if proba >= 0.7:
	st.success('High chance of rainfall.')
	elif proba >= 0.4:
	st.warning('Medium chance of rainfall.')
	else:
	st.info('Low chance of rainfall.')

	with st.expander('Show model input (debug)'):
	st.write('Used feature columns (ordered):')
	st.write(feature_names)
	st.dataframe(X)

	if extra_cols:
	st.caption(f'Note: These columns were ignored (not in feature_names): {extra_cols}')

	except Exception as e:
	st.error(f'Prediction failed: {e}')