darly9991 commited on
Commit
36ec84c
Β·
verified Β·
1 Parent(s): 72ff765

Update water_quality_index.py

Browse files
Files changed (1) hide show
  1. water_quality_index.py +161 -114
water_quality_index.py CHANGED
@@ -1,114 +1,161 @@
1
- import streamlit as st
2
- import pandas as pd
3
- import numpy as np
4
- import joblib
5
- import plotly.express as px
6
- import base64
7
- from sklearn.preprocessing import LabelEncoder
8
-
9
- def run():
10
- # === Load Trained Objects ===
11
- svc_model = joblib.load("svc_model.pkl")
12
- xgb_model = joblib.load("xgb_model.pkl")
13
- imputer = joblib.load("imputer.pkl")
14
- scaler = joblib.load("scaler.pkl")
15
- label_encoder = joblib.load("label_encoder.pkl") # already fitted
16
-
17
- # === Expected Columns ===
18
- feature_cols = [
19
- "pH (Potential Hydrogen)",
20
- "BOD (Biological Oxygen Demand) (mg/L)",
21
- "COD (Chemical Oxygen Demand) (mg/L)",
22
- "TSS (Total Suspended Solid) (mg/L)",
23
- "DO (Dissolved Oxygen) (mg/L)",
24
- "NO3N (Nitrat) (mg/L)",
25
- "Total Phosphat (mg/L)",
26
- "Fecal Coliform (MPN/100 mL)"
27
- ]
28
-
29
- # === Streamlit Setup ===
30
- st.set_page_config(page_title="Water Quality Classifier Dashboard", layout="wide")
31
- st.title("πŸ’§ Water Quality Prediction and Dashboard")
32
-
33
- # === Model Selector ===
34
- model_choice = st.selectbox("Select Model", ["SVC + SMOTETomek", "XGBoost + SMOTETomek"])
35
- model = svc_model if model_choice == "SVC + SMOTETomek" else xgb_model
36
-
37
- # === Input Section ===
38
- st.header("πŸ“₯ Input Data")
39
- data_option = st.radio("Choose Input Method", ["Upload CSV", "Manual Entry"])
40
- input_df = None
41
-
42
- if data_option == "Upload CSV":
43
- uploaded_file = st.file_uploader("Upload your CSV file", type=["csv"])
44
- if uploaded_file:
45
- df = pd.read_csv(uploaded_file)
46
- missing_cols = [col for col in feature_cols if col not in df.columns]
47
- if missing_cols:
48
- st.error(f"Missing required columns: {missing_cols}")
49
- else:
50
- input_df = df[feature_cols]
51
- else:
52
- with st.form("manual_form"):
53
- ph = st.number_input("pH", min_value=0.0, max_value=14.0, value=7.0)
54
- bod = st.number_input("BOD (mg/L)", min_value=0.0, max_value=100.0, value=2.0)
55
- cod = st.number_input("COD (mg/L)", min_value=0.0, max_value=500.0, value=10.0)
56
- tss = st.number_input("TSS (mg/L)", min_value=0.0, max_value=1000.0, value=20.0)
57
- do = st.number_input("DO (mg/L)", min_value=0.0, max_value=20.0, value=5.0)
58
- no3 = st.number_input("NO3N (mg/L)", min_value=0.0, max_value=10.0, value=1.0)
59
- tp = st.number_input("Total Phosphat (mg/L)", min_value=0.0, max_value=10.0, value=0.1)
60
- fecal = st.number_input("Fecal Coliform (MPN/100 mL)", min_value=0.0, max_value=1000000.0, value=500.0)
61
- submitted = st.form_submit_button("Predict")
62
-
63
- if submitted:
64
- input_df = pd.DataFrame([{
65
- "pH (Potential Hydrogen)": ph,
66
- "BOD (Biological Oxygen Demand) (mg/L)": bod,
67
- "COD (Chemical Oxygen Demand) (mg/L)": cod,
68
- "TSS (Total Suspended Solid) (mg/L)": tss,
69
- "DO (Dissolved Oxygen) (mg/L)": do,
70
- "NO3N (Nitrat) (mg/L)": no3,
71
- "Total Phosphat (mg/L)": tp,
72
- "Fecal Coliform (MPN/100 mL)": fecal
73
- }])
74
-
75
- # === Prediction Section ===
76
- if input_df is not None:
77
- st.header("πŸ” Prediction Results")
78
-
79
- try:
80
- # Preprocessing
81
- X_imp = imputer.transform(input_df)
82
- X_scaled = scaler.transform(X_imp)
83
-
84
- # Prediction
85
- y_proba = model.predict_proba(X_scaled)
86
- y_pred = model.predict(X_scaled)
87
- pred_class = label_encoder.inverse_transform(y_pred)[0]
88
-
89
- st.markdown(f"### πŸ§ͺ Predicted Class: `{pred_class}`")
90
-
91
- fig_pie = px.pie(
92
- names=label_encoder.classes_,
93
- values=y_proba[0],
94
- title="Prediction Probability per Class",
95
- color_discrete_sequence=px.colors.qualitative.Set3
96
- )
97
- st.plotly_chart(fig_pie, use_container_width=True)
98
-
99
- # Export
100
- st.subheader("πŸ“€ Download Prediction")
101
- input_df["Predicted Class"] = pred_class
102
- input_df[[f"Prob_{cls}" for cls in label_encoder.classes_]] = y_proba
103
- csv = input_df.to_csv(index=False)
104
- b64 = base64.b64encode(csv.encode()).decode()
105
- href = f'<a href="data:file/csv;base64,{b64}" download="prediction_result.csv">Download CSV File</a>'
106
- st.markdown(href, unsafe_allow_html=True)
107
-
108
- except Exception as e:
109
- st.error(f"Prediction failed: {e}")
110
-
111
- # === Footer ===
112
- st.markdown("---")
113
- st.caption("Developed with ❀️ for real-time water quality analysis")
114
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ import numpy as np
4
+ import joblib
5
+ import plotly.express as px
6
+ import base64
7
+ from sklearn.preprocessing import LabelEncoder
8
+
9
+ # === Thresholds for Rule-Based Classification ===
10
+ thresholds = {
11
+ 'pH_min': 6.0, 'pH_max': 9.0,
12
+ 'BOD': 3.0,
13
+ 'COD': 25.0,
14
+ 'TSS': 50.0,
15
+ 'DO': 4.0,
16
+ 'Nitrate': 10.0,
17
+ 'Phosphate': 0.2,
18
+ 'FecalColiform': 1000
19
+ }
20
+
21
+ def categorize_sample(row):
22
+ pH = row['pH (Potential Hydrogen)']
23
+ BOD = row['BOD (Biological Oxygen Demand) (mg/L)']
24
+ COD = row['COD (Chemical Oxygen Demand) (mg/L)']
25
+ DO = row['DO (Dissolved Oxygen) (mg/L)']
26
+ nitrate = row['NO3N (Nitrat) (mg/L)']
27
+ phosphate = row['Total Phosphat (mg/L)']
28
+ fecal = row['Fecal Coliform (MPN/100 mL)']
29
+ TSS = row['TSS (Total Suspended Solid) (mg/L)']
30
+
31
+ if (
32
+ thresholds['pH_min'] <= pH <= thresholds['pH_max'] and
33
+ BOD <= thresholds['BOD'] and
34
+ COD <= thresholds['COD'] and
35
+ DO >= thresholds['DO'] and
36
+ nitrate <= thresholds['Nitrate'] and
37
+ phosphate <= thresholds['Phosphate'] and
38
+ fecal <= thresholds['FecalColiform'] and
39
+ TSS <= thresholds['TSS']
40
+ ):
41
+ return "Safe", "Safe"
42
+
43
+ categories = []
44
+ if COD > thresholds['COD'] * 1.5 or pH < thresholds['pH_min'] or pH > thresholds['pH_max'] or TSS > thresholds['TSS']:
45
+ categories.append("Chemical")
46
+ if BOD > thresholds['BOD'] or DO < thresholds['DO'] or fecal > thresholds['FecalColiform'] or TSS > thresholds['TSS']:
47
+ categories.append("Biological")
48
+ if nitrate > thresholds['Nitrate'] or phosphate > thresholds['Phosphate'] or TSS > thresholds['TSS']:
49
+ categories.append("Eutrophication")
50
+
51
+ priority_order = ["Chemical", "Biological", "Eutrophication"]
52
+ for cat in priority_order:
53
+ if cat in categories:
54
+ return ", ".join(categories), cat
55
+
56
+ return "Safe", "Safe"
57
+
58
+ # === Streamlit App ===
59
+ def run():
60
+ svc_model = joblib.load("svc_model.pkl")
61
+ xgb_model = joblib.load("xgb_model.pkl")
62
+ imputer = joblib.load("imputer.pkl")
63
+ scaler = joblib.load("scaler.pkl")
64
+ label_encoder = joblib.load("label_encoder.pkl")
65
+
66
+ feature_cols = [
67
+ "pH (Potential Hydrogen)",
68
+ "BOD (Biological Oxygen Demand) (mg/L)",
69
+ "COD (Chemical Oxygen Demand) (mg/L)",
70
+ "TSS (Total Suspended Solid) (mg/L)",
71
+ "DO (Dissolved Oxygen) (mg/L)",
72
+ "NO3N (Nitrat) (mg/L)",
73
+ "Total Phosphat (mg/L)",
74
+ "Fecal Coliform (MPN/100 mL)"
75
+ ]
76
+
77
+ st.set_page_config(page_title="Water Quality Classifier Dashboard", layout="wide")
78
+ st.title("πŸ’§ Water Quality Prediction and Rule-Based Evaluation")
79
+
80
+ model_choice = st.selectbox("Select Model", ["SVC + SMOTETomek", "XGBoost + SMOTETomek"])
81
+ model = svc_model if model_choice == "SVC + SMOTETomek" else xgb_model
82
+
83
+ st.header("πŸ“₯ Input Data")
84
+ data_option = st.radio("Choose Input Method", ["Upload CSV", "Manual Entry"])
85
+ input_df = None
86
+
87
+ if data_option == "Upload CSV":
88
+ uploaded_file = st.file_uploader("Upload your CSV file", type=["csv"])
89
+ if uploaded_file:
90
+ df = pd.read_csv(uploaded_file)
91
+ missing_cols = [col for col in feature_cols if col not in df.columns]
92
+ if missing_cols:
93
+ st.error(f"Missing required columns: {missing_cols}")
94
+ else:
95
+ input_df = df[feature_cols]
96
+ else:
97
+ with st.form("manual_form"):
98
+ ph = st.number_input("pH", min_value=0.0, max_value=14.0, value=7.0)
99
+ bod = st.number_input("BOD (mg/L)", min_value=0.0, max_value=100.0, value=2.0)
100
+ cod = st.number_input("COD (mg/L)", min_value=0.0, max_value=500.0, value=10.0)
101
+ tss = st.number_input("TSS (mg/L)", min_value=0.0, max_value=1000.0, value=20.0)
102
+ do = st.number_input("DO (mg/L)", min_value=0.0, max_value=20.0, value=5.0)
103
+ no3 = st.number_input("NO3N (mg/L)", min_value=0.0, max_value=10.0, value=1.0)
104
+ tp = st.number_input("Total Phosphat (mg/L)", min_value=0.0, max_value=10.0, value=0.1)
105
+ fecal = st.number_input("Fecal Coliform (MPN/100 mL)", min_value=0.0, max_value=1000000.0, value=500.0)
106
+ submitted = st.form_submit_button("Predict")
107
+
108
+ if submitted:
109
+ input_df = pd.DataFrame([{
110
+ "pH (Potential Hydrogen)": ph,
111
+ "BOD (Biological Oxygen Demand) (mg/L)": bod,
112
+ "COD (Chemical Oxygen Demand) (mg/L)": cod,
113
+ "TSS (Total Suspended Solid) (mg/L)": tss,
114
+ "DO (Dissolved Oxygen) (mg/L)": do,
115
+ "NO3N (Nitrat) (mg/L)": no3,
116
+ "Total Phosphat (mg/L)": tp,
117
+ "Fecal Coliform (MPN/100 mL)": fecal
118
+ }])
119
+
120
+ if input_df is not None:
121
+ st.header("πŸ” Prediction Results")
122
+
123
+ try:
124
+ X_imp = imputer.transform(input_df)
125
+ X_scaled = scaler.transform(X_imp)
126
+ y_proba = model.predict_proba(X_scaled)
127
+ y_pred = model.predict(X_scaled)
128
+ pred_class = label_encoder.inverse_transform(y_pred)[0]
129
+
130
+ # Rule-Based Evaluation
131
+ rule_violations, rule_label = categorize_sample(input_df.iloc[0])
132
+
133
+ # Display results
134
+ st.markdown(f"### πŸ§ͺ ML Predicted Class: `{pred_class}`")
135
+ st.markdown(f"### πŸ“ Rule-Based Class: `{rule_label}`")
136
+ st.markdown(f"**Violations Detected:** {rule_violations}")
137
+
138
+ fig_pie = px.pie(
139
+ names=label_encoder.classes_,
140
+ values=y_proba[0],
141
+ title="Prediction Probability per Class",
142
+ color_discrete_sequence=px.colors.qualitative.Set3
143
+ )
144
+ st.plotly_chart(fig_pie, use_container_width=True)
145
+
146
+ # Export Results
147
+ input_df["Predicted Class (ML)"] = pred_class
148
+ input_df["Rule-Based Class"] = rule_label
149
+ input_df["Rule-Based Violations"] = rule_violations
150
+ input_df[[f"Prob_{cls}" for cls in label_encoder.classes_]] = y_proba
151
+ csv = input_df.to_csv(index=False)
152
+ b64 = base64.b64encode(csv.encode()).decode()
153
+ href = f'<a href="data:file/csv;base64,{b64}" download="prediction_result.csv">Download CSV File</a>'
154
+ st.subheader("πŸ“€ Download Result")
155
+ st.markdown(href, unsafe_allow_html=True)
156
+
157
+ except Exception as e:
158
+ st.error(f"Prediction failed: {e}")
159
+
160
+ st.markdown("---")
161
+ st.caption("Developed with ❀️ for integrated ML + expert rule water quality system")