Adilzhan commited on
Commit
c49b726
·
1 Parent(s): 2b4c8b2

Add exoplanet classification Gradio app with LFS

Browse files
.gitattributes CHANGED
@@ -33,3 +33,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ *.png filter=lfs diff=lfs merge=lfs -text
37
+ *.pem filter=lfs diff=lfs merge=lfs -text
.gradio/certificate.pem ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4c99356c265ee06c0ae0502e74d38231263513726d001cfe28ea25e70af2cc7f
3
+ size 1970
__pycache__/app.cpython-311.pyc ADDED
Binary file (48.2 kB). View file
 
app.py ADDED
@@ -0,0 +1,818 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Exoplanet Classification Web Application
3
+ ==========================================
4
+ Modern Gradio interface for TESS exoplanet classification
5
+ Features individual parameter inputs and batch processing
6
+
7
+ Author: ML Pipeline
8
+ Date: October 4, 2025
9
+ """
10
+
11
+ import warnings
12
+ warnings.filterwarnings('ignore')
13
+
14
+ import gradio as gr
15
+ import pandas as pd
16
+ import numpy as np
17
+ import joblib
18
+ import json
19
+ from pathlib import Path
20
+ from datetime import datetime
21
+ import traceback
22
+
23
+ # ====================================================================
24
+ # CONFIGURATION
25
+ # ====================================================================
26
+
27
+ MODEL_DIR = Path('mutiresult')
28
+
29
+ BASE_FEATURES = [
30
+ "ra", "dec", "st_pmra", "st_pmraerr1", "st_pmraerr2", "st_pmralim",
31
+ "st_pmdec", "st_pmdecerr1", "st_pmdecerr2", "st_pmdeclim",
32
+ "pl_tranmid", "pl_tranmiderr1", "pl_tranmiderr2", "pl_tranmidlim",
33
+ "pl_orbper", "pl_orbpererr1", "pl_orbpererr2", "pl_orbperlim",
34
+ "pl_trandurh", "pl_trandurherr1", "pl_trandurherr2", "pl_trandurhlim",
35
+ "pl_trandep", "pl_trandeperr1", "pl_trandeperr2", "pl_trandeplim",
36
+ "pl_rade", "pl_radeerr1", "pl_radeerr2", "pl_radelim",
37
+ "pl_insol", "pl_eqt",
38
+ "st_tmag", "st_tmagerr1", "st_tmagerr2", "st_tmaglim",
39
+ "st_dist", "st_disterr1", "st_disterr2", "st_distlim",
40
+ "st_teff", "st_tefferr1", "st_tefferr2", "st_tefflim",
41
+ "st_logg", "st_loggerr1", "st_loggerr2", "st_logglim",
42
+ "st_rad", "st_raderr1", "st_raderr2", "st_radlim"
43
+ ]
44
+
45
+ CLASS_DESCRIPTIONS = {
46
+ 'APC': 'Ambiguous Planet Candidate',
47
+ 'CP': 'Confirmed Planet',
48
+ 'FA': 'False Alarm',
49
+ 'FP': 'False Positive',
50
+ 'KP': 'Known Planet',
51
+ 'PC': 'Planet Candidate'
52
+ }
53
+
54
+ # Comprehensive parameter descriptions
55
+ PARAM_INFO = {
56
+ 'ra': ('Right Ascension (RA)', '0-360°. Celestial longitude coordinate of the star'),
57
+ 'dec': ('Declination (Dec)', '-90 to +90°. Celestial latitude coordinate of the star'),
58
+ 'st_pmra': ('Proper Motion in RA', 'mas/yr. Star movement in RA direction'),
59
+ 'st_pmraerr1': ('PM RA Upper Error', 'mas/yr. Positive uncertainty'),
60
+ 'st_pmraerr2': ('PM RA Lower Error', 'mas/yr. Negative uncertainty'),
61
+ 'st_pmralim': ('PM RA Limit Flag', '0=measured, 1=upper limit, -1=lower limit'),
62
+ 'st_pmdec': ('Proper Motion in Dec', 'mas/yr. Star movement in Dec direction'),
63
+ 'st_pmdecerr1': ('PM Dec Upper Error', 'mas/yr. Positive uncertainty'),
64
+ 'st_pmdecerr2': ('PM Dec Lower Error', 'mas/yr. Negative uncertainty'),
65
+ 'st_pmdeclim': ('PM Dec Limit Flag', '0=measured, 1=upper limit, -1=lower limit'),
66
+ 'pl_tranmid': ('Transit Midpoint Time', 'BJD. Time when planet crosses star center'),
67
+ 'pl_tranmiderr1': ('Transit Mid Upper Error', 'days. Positive uncertainty'),
68
+ 'pl_tranmiderr2': ('Transit Mid Lower Error', 'days. Negative uncertainty'),
69
+ 'pl_tranmidlim': ('Transit Mid Limit Flag', '0=measured, 1=upper limit, -1=lower limit'),
70
+ 'pl_orbper': ('Orbital Period', 'days. Time for one complete orbit'),
71
+ 'pl_orbpererr1': ('Orbital Period Upper Error', 'days. Positive uncertainty'),
72
+ 'pl_orbpererr2': ('Orbital Period Lower Error', 'days. Negative uncertainty'),
73
+ 'pl_orbperlim': ('Orbital Period Limit Flag', '0=measured, 1=upper limit, -1=lower limit'),
74
+ 'pl_trandurh': ('Transit Duration', 'hours. How long planet blocks star'),
75
+ 'pl_trandurherr1': ('Transit Duration Upper Error', 'hours. Positive uncertainty'),
76
+ 'pl_trandurherr2': ('Transit Duration Lower Error', 'hours. Negative uncertainty'),
77
+ 'pl_trandurhlim': ('Transit Duration Limit Flag', '0=measured, 1=upper limit, -1=lower limit'),
78
+ 'pl_trandep': ('Transit Depth', '%. Fraction of starlight blocked'),
79
+ 'pl_trandeperr1': ('Transit Depth Upper Error', '%. Positive uncertainty'),
80
+ 'pl_trandeperr2': ('Transit Depth Lower Error', '%. Negative uncertainty'),
81
+ 'pl_trandeplim': ('Transit Depth Limit Flag', '0=measured, 1=upper limit, -1=lower limit'),
82
+ 'pl_rade': ('Planet Radius', 'Earth radii (R⊕). Planet size vs Earth'),
83
+ 'pl_radeerr1': ('Planet Radius Upper Error', 'R⊕. Positive uncertainty'),
84
+ 'pl_radeerr2': ('Planet Radius Lower Error', 'R⊕. Negative uncertainty'),
85
+ 'pl_radelim': ('Planet Radius Limit Flag', '0=measured, 1=upper limit, -1=lower limit'),
86
+ 'pl_insol': ('Insolation Flux', 'Earth flux. Stellar energy hitting planet'),
87
+ 'pl_eqt': ('Equilibrium Temperature', 'Kelvin. Expected planet surface temperature'),
88
+ 'st_tmag': ('TESS Magnitude', 'mag. Star brightness (lower=brighter, typical 6-16)'),
89
+ 'st_tmagerr1': ('TESS Mag Upper Error', 'mag. Positive uncertainty'),
90
+ 'st_tmagerr2': ('TESS Mag Lower Error', 'mag. Negative uncertainty'),
91
+ 'st_tmaglim': ('TESS Mag Limit Flag', '0=measured, 1=upper limit, -1=lower limit'),
92
+ 'st_dist': ('Distance', 'parsecs. Distance to star (1 pc ≈ 3.26 light-years)'),
93
+ 'st_disterr1': ('Distance Upper Error', 'parsecs. Positive uncertainty'),
94
+ 'st_disterr2': ('Distance Lower Error', 'parsecs. Negative uncertainty'),
95
+ 'st_distlim': ('Distance Limit Flag', '0=measured, 1=upper limit, -1=lower limit'),
96
+ 'st_teff': ('Effective Temperature', 'Kelvin. Star surface temp (Sun ≈ 5778K)'),
97
+ 'st_tefferr1': ('Effective Temp Upper Error', 'K. Positive uncertainty'),
98
+ 'st_tefferr2': ('Effective Temp Lower Error', 'K. Negative uncertainty'),
99
+ 'st_tefflim': ('Effective Temp Limit Flag', '0=measured, 1=upper limit, -1=lower limit'),
100
+ 'st_logg': ('Surface Gravity', 'log₁₀(cm/s²). Stellar gravity (Sun ≈ 4.44)'),
101
+ 'st_loggerr1': ('Surface Gravity Upper Error', 'dex. Positive uncertainty'),
102
+ 'st_loggerr2': ('Surface Gravity Lower Error', 'dex. Negative uncertainty'),
103
+ 'st_logglim': ('Surface Gravity Limit Flag', '0=measured, 1=upper limit, -1=lower limit'),
104
+ 'st_rad': ('Stellar Radius', 'Solar radii (R☉). Star size vs Sun'),
105
+ 'st_raderr1': ('Stellar Radius Upper Error', 'R☉. Positive uncertainty'),
106
+ 'st_raderr2': ('Stellar Radius Lower Error', 'R☉. Negative uncertainty'),
107
+ 'st_radlim': ('Stellar Radius Limit Flag', '0=measured, 1=upper limit, -1=lower limit'),
108
+ }
109
+
110
+ # ====================================================================
111
+ # MODEL LOADING
112
+ # ====================================================================
113
+
114
+ class ModelPipeline:
115
+ def __init__(self, model_dir):
116
+ self.model_dir = Path(model_dir)
117
+ self.models = {}
118
+ self.scaler = None
119
+ self.imputer = None
120
+ self.label_encoder = None
121
+ self.feature_info = None
122
+ self.metrics = None
123
+ self._load_artifacts()
124
+
125
+ def _load_artifacts(self):
126
+ try:
127
+ print("Loading preprocessing artifacts...")
128
+ self.scaler = joblib.load(self.model_dir / 'scaler.pkl')
129
+ self.imputer = joblib.load(self.model_dir / 'imputer.pkl')
130
+ self.label_encoder = joblib.load(self.model_dir / 'label_encoder.pkl')
131
+
132
+ with open(self.model_dir / 'feature_info.json', 'r') as f:
133
+ self.feature_info = json.load(f)
134
+
135
+ with open(self.model_dir / 'metrics_summary.json', 'r') as f:
136
+ self.metrics = json.load(f)
137
+
138
+ print("Loading trained models...")
139
+ model_files = {
140
+ 'LightGBM': 'model_lightgbm.pkl',
141
+ 'XGBoost': 'model_xgboost.pkl',
142
+ 'RandomForest': 'model_randomforest.pkl',
143
+ 'ExtraTrees': 'model_extratrees.pkl',
144
+ 'LogisticRegression': 'model_logisticregression.pkl'
145
+ }
146
+
147
+ for name, filename in model_files.items():
148
+ model_path = self.model_dir / filename
149
+ if model_path.exists():
150
+ self.models[name] = joblib.load(model_path)
151
+ print(f" ✓ Loaded {name}")
152
+
153
+ print(f"\n✓ Successfully loaded {len(self.models)} models")
154
+ print(f"✓ Target classes: {list(self.label_encoder.classes_)}")
155
+
156
+ except Exception as e:
157
+ print(f"Error loading artifacts: {str(e)}")
158
+ raise
159
+
160
+ def create_features(self, df):
161
+ X = df.copy()
162
+
163
+ if 'st_rad' in X.columns and 'st_teff' in X.columns:
164
+ X['luminosity_proxy'] = X['st_rad']**2 * X['st_teff']**4
165
+
166
+ if 'pl_rade' in X.columns and 'st_rad' in X.columns:
167
+ X['planet_star_radius_ratio'] = X['pl_rade'] / (X['st_rad'] + 1e-10)
168
+ X['transit_depth_proxy'] = (X['pl_rade'] / (X['st_rad'] + 1e-10))**2
169
+
170
+ if 'st_dist' in X.columns:
171
+ X['log_distance'] = np.log1p(X['st_dist'])
172
+ X['inv_distance'] = 1 / (X['st_dist'] + 1e-10)
173
+
174
+ if 'pl_eqt' in X.columns and 'st_teff' in X.columns:
175
+ X['temp_ratio'] = X['pl_eqt'] / (X['st_teff'] + 1e-10)
176
+
177
+ if 'pl_insol' in X.columns:
178
+ X['log_insol'] = np.log1p(X['pl_insol'])
179
+ X['sqrt_insol'] = np.sqrt(X['pl_insol'] + 1e-10)
180
+
181
+ if 'pl_orbper' in X.columns:
182
+ X['log_orbper'] = np.log1p(X['pl_orbper'])
183
+
184
+ error_pairs = [
185
+ ('st_pmra', 'st_pmraerr1'),
186
+ ('st_pmdec', 'st_pmdecerr1'),
187
+ ('pl_rade', 'pl_radeerr1'),
188
+ ]
189
+
190
+ for base, error in error_pairs:
191
+ if base in X.columns and error in X.columns:
192
+ X[f'{base}_error_ratio'] = np.abs(X[error]) / (np.abs(X[base]) + 1e-10)
193
+
194
+ numeric_cols = [col for col in BASE_FEATURES[:10] if col in X.columns]
195
+ if len(numeric_cols) > 3:
196
+ X['feature_mean'] = X[numeric_cols].mean(axis=1)
197
+ X['feature_std'] = X[numeric_cols].std(axis=1)
198
+ X['feature_max'] = X[numeric_cols].max(axis=1)
199
+ X['feature_min'] = X[numeric_cols].min(axis=1)
200
+
201
+ X = X.replace([np.inf, -np.inf], np.nan)
202
+ for col in X.columns:
203
+ if X[col].isnull().any():
204
+ X[col].fillna(X[col].median(), inplace=True)
205
+
206
+ return X
207
+
208
+ def preprocess(self, df):
209
+ """Preprocess with correct order: impute → engineer → scale → select"""
210
+ # Step 1: Apply imputer to base 52 features
211
+ imputer_features = list(self.imputer.feature_names_in_)
212
+ X_base = df[imputer_features].copy()
213
+ X_imputed = self.imputer.transform(X_base)
214
+ X_imputed_df = pd.DataFrame(X_imputed, columns=imputer_features, index=df.index)
215
+
216
+ # Step 2: Create engineered features (from 52 to ~68 features)
217
+ X_engineered = self.create_features(X_imputed_df)
218
+
219
+ # Step 3: Apply scaler to ALL engineered features (68 features)
220
+ # IMPORTANT: Scaler was fitted on all engineered features, not selected ones
221
+ X_scaled = self.scaler.transform(X_engineered)
222
+ X_scaled_df = pd.DataFrame(X_scaled, columns=X_engineered.columns, index=df.index)
223
+
224
+ # Step 4: Select final 60 features AFTER scaling
225
+ X_selected = X_scaled_df[self.feature_info['selected_features']]
226
+
227
+ return X_selected
228
+
229
+ def predict(self, df, model_name='Ensemble'):
230
+ X = self.preprocess(df)
231
+
232
+ if model_name == 'Ensemble':
233
+ probabilities = []
234
+ weights = [1.5, 1.5, 1.0, 1.0]
235
+ models_to_use = ['LightGBM', 'XGBoost', 'RandomForest', 'ExtraTrees']
236
+
237
+ for model_name_single in models_to_use:
238
+ if model_name_single in self.models:
239
+ proba = self.models[model_name_single].predict_proba(X)
240
+ probabilities.append(proba)
241
+
242
+ avg_proba = np.average(probabilities, axis=0, weights=weights[:len(probabilities)])
243
+ y_pred = np.argmax(avg_proba, axis=1)
244
+ predictions = self.label_encoder.inverse_transform(y_pred)
245
+
246
+ return predictions, avg_proba
247
+ else:
248
+ model = self.models[model_name]
249
+ y_pred = model.predict(X)
250
+ y_proba = model.predict_proba(X)
251
+ predictions = self.label_encoder.inverse_transform(y_pred)
252
+
253
+ return predictions, y_proba
254
+
255
+
256
+ print("Initializing Model Pipeline...")
257
+ pipeline = ModelPipeline(MODEL_DIR)
258
+
259
+ # ====================================================================
260
+ # PREDICTION FUNCTIONS
261
+ # ====================================================================
262
+
263
+ def predict_single(file, model_choice):
264
+ """Predict from single sample CSV file (must have exactly 2 rows: header + 1 data row)"""
265
+ try:
266
+ if file is None:
267
+ return "⚠️ Please upload a CSV file with 1 sample (2 rows: header + data)", "", ""
268
+
269
+ df = pd.read_csv(file.name)
270
+
271
+ # Validate: must have exactly 1 data row
272
+ if len(df) == 0:
273
+ error_html = """
274
+ <div style="background: #ffebee; border-left: 5px solid #f44336; padding: 20px; border-radius: 10px;">
275
+ <h3 style="color: #c62828; margin-top: 0;">❌ Empty File</h3>
276
+ <p style="color: #c62828;">CSV file has no data rows. Please upload a file with exactly 1 data row (2 total rows including header).</p>
277
+ </div>
278
+ """
279
+ return error_html, "", ""
280
+
281
+ if len(df) > 1:
282
+ error_html = f"""
283
+ <div style="background: #fff3e0; border-left: 5px solid #ff9800; padding: 20px; border-radius: 10px;">
284
+ <h3 style="color: #e65100; margin-top: 0;">⚠️ Too Many Rows</h3>
285
+ <p style="color: #e65100;">
286
+ Your CSV file has <strong>{len(df)} data rows</strong>. For single sample prediction, the file must have exactly <strong>1 data row</strong> (2 total rows: header + data).
287
+ </p>
288
+ <p style="color: #e65100;">
289
+ If you want to process multiple samples, please use the <strong>"📦 Batch Processing"</strong> tab instead.
290
+ </p>
291
+ </div>
292
+ """
293
+ return error_html, "", ""
294
+
295
+ # Validate columns
296
+ missing_cols = [col for col in BASE_FEATURES if col not in df.columns]
297
+ if len(missing_cols) > 0:
298
+ error_html = f"""
299
+ <div style="background: #ffebee; border-left: 5px solid #f44336; padding: 20px; border-radius: 10px;">
300
+ <h3 style="color: #c62828; margin-top: 0;">❌ Missing Required Columns</h3>
301
+ <p style="color: #c62828;">Your CSV is missing <strong>{len(missing_cols)}</strong> required columns:</p>
302
+ <pre style="background: #fff; padding: 15px; border-radius: 5px; overflow-x: auto; color: #c62828;">
303
+ {', '.join(missing_cols[:30])}
304
+ </pre>
305
+ <p style="color: #c62828;">Please ensure all 52 base features are present. Download the template for reference.</p>
306
+ </div>
307
+ """
308
+ return error_html, "", ""
309
+
310
+ # Get ID if exists
311
+ id_value = "N/A"
312
+ for possible_id in ['id', 'ID', 'toi', 'TOI', 'tid', 'TID']:
313
+ if possible_id in df.columns:
314
+ id_value = str(df[possible_id].iloc[0])
315
+ break
316
+
317
+ # Make prediction
318
+ predictions, probabilities = pipeline.predict(df, model_name=model_choice)
319
+
320
+ pred_label = predictions[0]
321
+ pred_confidence = probabilities[0].max()
322
+
323
+ # Beautiful result card
324
+ result_html = f"""
325
+ <div style="background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
326
+ padding: 40px; border-radius: 20px; box-shadow: 0 20px 60px rgba(0,0,0,0.3);">
327
+ <div style="text-align: center; color: white;">
328
+ <h1 style="margin: 0 0 30px 0; font-size: 42px; text-shadow: 2px 2px 4px rgba(0,0,0,0.3);">
329
+ 🎯 Classification Result
330
+ </h1>
331
+ <div style="background: rgba(255,255,255,0.15); padding: 40px; border-radius: 15px;
332
+ backdrop-filter: blur(10px); border: 2px solid rgba(255,255,255,0.2);">
333
+ <div style="margin-bottom: 30px;">
334
+ <div style="font-size: 18px; opacity: 0.9; margin-bottom: 10px;">Sample ID: {id_value}</div>
335
+ <div style="font-size: 22px; opacity: 0.95; margin-bottom: 15px; letter-spacing: 2px;">
336
+ PREDICTED CLASS
337
+ </div>
338
+ <div style="font-size: 72px; font-weight: bold; margin: 20px 0;
339
+ text-shadow: 3px 3px 6px rgba(0,0,0,0.4); letter-spacing: 4px;">
340
+ {pred_label}
341
+ </div>
342
+ <div style="font-size: 20px; opacity: 0.9; font-style: italic;">
343
+ {CLASS_DESCRIPTIONS.get(pred_label, '')}
344
+ </div>
345
+ </div>
346
+ <div style="padding-top: 30px; border-top: 2px solid rgba(255,255,255,0.3);">
347
+ <div style="font-size: 18px; opacity: 0.95; margin-bottom: 15px; letter-spacing: 1px;">
348
+ CONFIDENCE SCORE
349
+ </div>
350
+ <div style="font-size: 54px; font-weight: bold; text-shadow: 2px 2px 4px rgba(0,0,0,0.3);">
351
+ {pred_confidence:.1%}
352
+ </div>
353
+ </div>
354
+ </div>
355
+ <div style="margin-top: 25px; font-size: 16px; opacity: 0.85;">
356
+ Model: {model_choice} | Accuracy: {pipeline.metrics[model_choice]['Accuracy']:.1%}
357
+ </div>
358
+ </div>
359
+ </div>
360
+ """
361
+
362
+ # Probability bars
363
+ prob_html = "<div style='padding: 20px;'>"
364
+ prob_list = [(pipeline.label_encoder.classes_[i], probabilities[0, i])
365
+ for i in range(len(pipeline.label_encoder.classes_))]
366
+ prob_list.sort(key=lambda x: x[1], reverse=True)
367
+
368
+ for class_name, prob in prob_list:
369
+ color = "#4caf50" if class_name == pred_label else "#2196F3"
370
+ prob_html += f"""
371
+ <div style="margin-bottom: 20px;">
372
+ <div style="display: flex; justify-content: space-between; margin-bottom: 8px;">
373
+ <span style="font-weight: bold; font-size: 18px;">{class_name}</span>
374
+ <span style="font-weight: bold; font-size: 18px; color: {color};">{prob:.1%}</span>
375
+ </div>
376
+ <div style="font-size: 14px; color: #666; margin-bottom: 8px;">
377
+ {CLASS_DESCRIPTIONS.get(class_name, '')}
378
+ </div>
379
+ <div style="background: #e0e0e0; border-radius: 10px; height: 30px; overflow: hidden;">
380
+ <div style="background: linear-gradient(90deg, {color}, {color}aa);
381
+ width: {prob*100}%; height: 100%; border-radius: 10px;
382
+ transition: width 0.3s ease; display: flex; align-items: center;
383
+ justify-content: flex-end; padding-right: 10px; color: white;
384
+ font-weight: bold; font-size: 14px;">
385
+ </div>
386
+ </div>
387
+ </div>
388
+ """
389
+ prob_html += "</div>"
390
+
391
+ # Input data preview
392
+ preview_html = f"""
393
+ <div style="background: #f5f5f5; padding: 20px; border-radius: 10px; margin-top: 20px;">
394
+ <h3 style="color: #333; margin-top: 0;">📋 Input Data Summary</h3>
395
+ <p style="color: #666;"><strong>Sample ID:</strong> {id_value}</p>
396
+ <p style="color: #666;"><strong>Features:</strong> {len(df.columns)} columns loaded</p>
397
+ <p style="color: #666;"><strong>Model:</strong> {model_choice}</p>
398
+ </div>
399
+ """
400
+
401
+ return result_html, prob_html, preview_html
402
+
403
+ except Exception as e:
404
+ error_html = f"""
405
+ <div style="background: #ffebee; border-left: 5px solid #f44336; padding: 20px; border-radius: 10px;">
406
+ <h3 style="color: #c62828; margin-top: 0;">❌ Prediction Error</h3>
407
+ <p style="color: #c62828;">{str(e)}</p>
408
+ <pre style="background: #fff; padding: 10px; border-radius: 5px; overflow-x: auto;">
409
+ {traceback.format_exc()}
410
+ </pre>
411
+ </div>
412
+ """
413
+ return error_html, "", ""
414
+
415
+
416
+ def predict_batch(file, model_choice):
417
+ """Batch prediction from CSV file"""
418
+ try:
419
+ if file is None:
420
+ return "⚠️ Please upload a CSV file", None, None
421
+
422
+ df = pd.read_csv(file.name)
423
+
424
+ if len(df) == 0:
425
+ return "⚠️ CSV file is empty", None, None
426
+
427
+ # Validate columns
428
+ missing_cols = [col for col in BASE_FEATURES if col not in df.columns]
429
+ if len(missing_cols) > 0:
430
+ error_msg = f"""## ❌ Missing Required Columns
431
+
432
+ Your CSV is missing {len(missing_cols)} required columns:
433
+
434
+ ```
435
+ {', '.join(missing_cols[:20])}
436
+ ```
437
+
438
+ Please ensure all 52 base features are present in your CSV file."""
439
+ return error_msg, None, None
440
+
441
+ # Get ID column
442
+ id_col = None
443
+ for possible_id in ['id', 'ID', 'toi', 'TOI', 'tid', 'TID']:
444
+ if possible_id in df.columns:
445
+ id_col = possible_id
446
+ break
447
+
448
+ # Predict
449
+ predictions, probabilities = pipeline.predict(df, model_name=model_choice)
450
+
451
+ # Simple results
452
+ results = pd.DataFrame()
453
+ results['ID'] = df[id_col].values if id_col else range(1, len(df) + 1)
454
+ results['Label'] = predictions
455
+
456
+ # Detailed results
457
+ detailed_results = results.copy()
458
+ detailed_results['Confidence'] = probabilities.max(axis=1)
459
+ for i, class_name in enumerate(pipeline.label_encoder.classes_):
460
+ detailed_results[f'Prob_{class_name}'] = probabilities[:, i]
461
+
462
+ # Summary
463
+ summary = f"""## ✅ Batch Processing Complete
464
+
465
+ **Model**: {model_choice}
466
+ **Samples Processed**: {len(df)}
467
+ **Time**: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}
468
+
469
+ ### Class Distribution:
470
+ """
471
+ for class_name in pipeline.label_encoder.classes_:
472
+ count = (predictions == class_name).sum()
473
+ pct = count / len(predictions) * 100
474
+ summary += f"\n- **{class_name}**: {count} ({pct:.1f}%)"
475
+
476
+ summary += f"\n\n**Average Confidence**: {probabilities.max(axis=1).mean():.1%}"
477
+
478
+ # Save file
479
+ timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
480
+ output_file = f'predictions_{timestamp}.csv'
481
+ results.to_csv(output_file, index=False)
482
+
483
+ return summary, detailed_results, output_file
484
+
485
+ except Exception as e:
486
+ return f"❌ Error: {str(e)}", None, None
487
+
488
+
489
+ def get_template():
490
+ """Generate template CSV"""
491
+ template_df = pd.DataFrame(columns=['id'] + BASE_FEATURES)
492
+ template_df.loc[0] = [1] + [0.0] * len(BASE_FEATURES)
493
+ template_file = 'template_exoplanet.csv'
494
+ template_df.to_csv(template_file, index=False)
495
+ return template_file
496
+
497
+
498
+ # ====================================================================
499
+ # GRADIO UI
500
+ # ====================================================================
501
+
502
+ custom_css = """
503
+ .gradio-container {
504
+ font-family: 'Segoe UI', Arial, sans-serif !important;
505
+ max-width: 1800px !important;
506
+ }
507
+ .gr-button-primary {
508
+ background: linear-gradient(90deg, #667eea 0%, #764ba2 100%) !important;
509
+ border: none !important;
510
+ font-weight: 600 !important;
511
+ font-size: 18px !important;
512
+ }
513
+ .gr-button-secondary {
514
+ background: linear-gradient(90deg, #4caf50 0%, #45a049 100%) !important;
515
+ border: none !important;
516
+ color: white !important;
517
+ }
518
+ """
519
+
520
+ with gr.Blocks(css=custom_css, title="🌍 Exoplanet Classifier", theme=gr.themes.Soft()) as app:
521
+
522
+ # Header
523
+ gr.HTML("""
524
+ <div style="text-align: center; background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
525
+ padding: 50px; border-radius: 20px; margin-bottom: 30px; box-shadow: 0 10px 30px rgba(0,0,0,0.2);">
526
+ <h1 style="color: white; font-size: 56px; margin: 0; text-shadow: 2px 2px 4px rgba(0,0,0,0.3);">
527
+ 🌍 TESS Exoplanet Classifier
528
+ </h1>
529
+ <p style="color: rgba(255,255,255,0.95); font-size: 24px; margin: 20px 0 0 0;">
530
+ AI-Powered Classification System for Transiting Exoplanet Survey Satellite Data
531
+ </p>
532
+ <div style="margin-top: 20px; display: flex; justify-content: center; gap: 30px; flex-wrap: wrap;">
533
+ <div style="background: rgba(255,255,255,0.2); padding: 15px 30px; border-radius: 10px;">
534
+ <div style="font-size: 32px; font-weight: bold;">72.2%</div>
535
+ <div style="font-size: 14px; opacity: 0.9;">Ensemble Accuracy</div>
536
+ </div>
537
+ <div style="background: rgba(255,255,255,0.2); padding: 15px 30px; border-radius: 10px;">
538
+ <div style="font-size: 32px; font-weight: bold;">6</div>
539
+ <div style="font-size: 14px; opacity: 0.9;">Target Classes</div>
540
+ </div>
541
+ <div style="background: rgba(255,255,255,0.2); padding: 15px 30px; border-radius: 10px;">
542
+ <div style="font-size: 32px; font-weight: bold;">5</div>
543
+ <div style="font-size: 14px; opacity: 0.9;">ML Models</div>
544
+ </div>
545
+ </div>
546
+ </div>
547
+ """)
548
+
549
+ # Info
550
+ with gr.Accordion("📖 About & Instructions", open=False):
551
+ gr.Markdown("""
552
+ ## 🎯 Classification Classes
553
+
554
+ | Code | Name | Description |
555
+ |------|------|-------------|
556
+ | **APC** | Ambiguous Planet Candidate | Requires further analysis |
557
+ | **CP** | Confirmed Planet | Validated exoplanet |
558
+ | **FA** | False Alarm | Instrumental/systematic effects |
559
+ | **FP** | False Positive | Astrophysical false positive |
560
+ | **KP** | Known Planet | Previously discovered |
561
+ | **PC** | Planet Candidate | High-confidence candidate |
562
+
563
+ ## 📊 Models
564
+
565
+ - **Ensemble**: Weighted combination (72.2% accuracy) - **RECOMMENDED**
566
+ - **XGBoost**: Gradient boosting (72.2%)
567
+ - **LightGBM**: Fast gradient boosting (71.6%)
568
+ - **Random Forest**: Tree ensemble (70.3%)
569
+ - **Extra Trees**: Randomized trees (70.8%)
570
+
571
+ ## 💡 Usage
572
+
573
+ **Single Sample**: Upload a CSV file with exactly 2 rows (header + 1 data row) containing all 52 required feature columns.
574
+ **Batch Processing**: Upload a CSV with all 52 required feature columns for bulk predictions of multiple samples.
575
+ """)
576
+
577
+ # Tabs
578
+ with gr.Tabs():
579
+
580
+ # ===== SINGLE SAMPLE TAB =====
581
+ with gr.Tab("🔬 Single Sample Prediction"):
582
+ gr.HTML("""
583
+ <div style="background: linear-gradient(to right, #e3f2fd, #f5f5f5); padding: 30px;
584
+ border-radius: 15px; border-left: 5px solid #2196F3; margin-bottom: 25px;
585
+ box-shadow: 0 4px 6px rgba(0,0,0,0.1);">
586
+ <h2 style="color: black; margin: 0 0 15px 0; font-size: 28px;">📋 Single Sample CSV Upload</h2>
587
+ <p style="color: black; margin: 0 0 10px 0; font-size: 17px; line-height: 1.7;">
588
+ Upload a CSV file containing <strong style="color: black;">exactly 1 sample</strong> for classification.
589
+ </p>
590
+ <div style="background: rgba(33, 150, 243, 0.1); padding: 15px; border-radius: 8px; margin-top: 15px;">
591
+ <p style="color: black; margin: 0; font-size: 15px;"><strong style="color: black;">📌 Important:</strong></p>
592
+ <ul style="color: black; margin: 10px 0 0 0; padding-left: 20px;">
593
+ <li style="color: black;">File must have <strong style="color: black;">exactly 2 rows</strong>: 1 header row + 1 data row</li>
594
+ <li style="color: black;">Must contain all <strong style="color: black;">52 required feature columns</strong></li>
595
+ <li style="color: black;">Optionally include an ID column (id, toi, or tid) for identification</li>
596
+ <li style="color: black;">Use 0 or NaN for unknown/missing values</li>
597
+ </ul>
598
+ </div>
599
+ </div>
600
+ """)
601
+
602
+ with gr.Row():
603
+ with gr.Column(scale=1):
604
+ gr.HTML("""
605
+ <div style="background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
606
+ padding: 20px; border-radius: 12px; margin-bottom: 20px; text-align: center;">
607
+ <h3 style="color: white; margin: 0; font-size: 22px;">⚙️ Configuration</h3>
608
+ </div>
609
+ """)
610
+
611
+ single_file = gr.File(
612
+ label="📁 Upload Single Sample CSV",
613
+ file_types=[".csv"],
614
+ type="filepath"
615
+ )
616
+
617
+ single_model = gr.Dropdown(
618
+ choices=['Ensemble', 'LightGBM', 'XGBoost', 'RandomForest', 'ExtraTrees'],
619
+ value='Ensemble',
620
+ label="🤖 Select Classification Model",
621
+ info="Ensemble combines multiple models (Recommended)"
622
+ )
623
+
624
+ single_predict_btn = gr.Button("🚀 Classify Sample", variant="primary", size="lg")
625
+
626
+ gr.HTML("""
627
+ <div style="background: linear-gradient(to right, #fff9e6, #fffef0);
628
+ border-left: 5px solid #ff9800; padding: 20px; border-radius: 10px; margin-top: 20px;">
629
+ <h4 style="color: black; margin: 0 0 10px 0;">⚡ Quick Requirements:</h4>
630
+ <ul style="color: black; margin: 0; padding-left: 20px; line-height: 2;">
631
+ <li style="color: black;"><strong style="color: black;">Exactly 2 rows</strong> (header + 1 data row)</li>
632
+ <li style="color: black;">All <strong style="color: black;">52 feature columns</strong> required</li>
633
+ <li style="color: black;">Optional ID column for tracking</li>
634
+ </ul>
635
+ </div>
636
+
637
+ <div style="background: #e8f5e9; border-left: 5px solid #4caf50;
638
+ padding: 20px; border-radius: 10px; margin-top: 15px;">
639
+ <h4 style="color: black; margin: 0 0 10px 0;">💡 Need Help?</h4>
640
+ <p style="color: black; margin: 0 0 10px 0; font-size: 14px;">
641
+ Download a properly formatted template CSV with all required columns:
642
+ </p>
643
+ """)
644
+
645
+ single_template_btn = gr.Button("📥 Download Template CSV", variant="secondary")
646
+ single_template_file = gr.File(label="Template File")
647
+
648
+ gr.HTML("</div>")
649
+
650
+ # Feature Requirements Section
651
+ with gr.Accordion("📋 Required Feature Columns (52)", open=False):
652
+ feature_table = """<div style='max-height: 500px; overflow-y: auto; padding: 10px;'>
653
+ <table style='width: 100%; border-collapse: collapse; font-size: 13px;'>
654
+ <thead style='position: sticky; top: 0; background: #667eea; color: white;'>
655
+ <tr>
656
+ <th style='padding: 10px; text-align: left; border: 1px solid #ddd;'>Column Name</th>
657
+ <th style='padding: 10px; text-align: left; border: 1px solid #ddd;'>Parameter</th>
658
+ <th style='padding: 10px; text-align: left; border: 1px solid #ddd;'>Description</th>
659
+ </tr>
660
+ </thead>
661
+ <tbody>"""
662
+
663
+ for i, feat in enumerate(BASE_FEATURES):
664
+ label, desc = PARAM_INFO[feat]
665
+ bg_color = '#f9f9f9' if i % 2 == 0 else 'white'
666
+ feature_table += f"""
667
+ <tr style='background: {bg_color};'>
668
+ <td style='padding: 8px; border: 1px solid #ddd; font-family: monospace; color: #c7254e; background-color: #f9f2f4; font-weight: bold;'>{feat}</td>
669
+ <td style='padding: 8px; border: 1px solid #ddd; font-weight: 600; color: #333;'>{label}</td>
670
+ <td style='padding: 8px; border: 1px solid #ddd; color: #555;'>{desc}</td>
671
+ </tr>"""
672
+
673
+ feature_table += """</tbody></table></div>"""
674
+ gr.HTML(feature_table)
675
+
676
+ with gr.Column(scale=2):
677
+ gr.HTML("""
678
+ <div style="background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
679
+ padding: 20px; border-radius: 12px; margin-bottom: 20px;">
680
+ <h2 style="color: white; margin: 0; text-align: center; font-size: 28px;">
681
+ 📊 Classification Results
682
+ </h2>
683
+ </div>
684
+ """)
685
+
686
+ single_result_display = gr.HTML(label="Prediction Result")
687
+ single_prob_display = gr.HTML(label="Class Probabilities")
688
+ single_preview = gr.HTML(label="Input Summary")
689
+
690
+ single_predict_btn.click(
691
+ fn=predict_single,
692
+ inputs=[single_file, single_model],
693
+ outputs=[single_result_display, single_prob_display, single_preview]
694
+ )
695
+
696
+ single_template_btn.click(fn=get_template, outputs=single_template_file)
697
+
698
+ # ===== BATCH TAB =====
699
+ with gr.Tab("📦 Batch Processing"):
700
+ gr.HTML("""
701
+ <div style="background: linear-gradient(to right, #fff3e6, #f5f5f5); padding: 25px;
702
+ border-radius: 15px; border-left: 5px solid #ff9800; margin-bottom: 25px;">
703
+ <h2 style="color: #e65100; margin: 0 0 10px 0;">Bulk CSV Processing</h2>
704
+ <p style="color: #424242; margin: 0; font-size: 16px; line-height: 1.6;">
705
+ Upload a CSV file containing multiple samples. Your file must include all 52 required feature columns.
706
+ Results will be provided in a simple ID-Label format ready for download.
707
+ </p>
708
+ </div>
709
+ """)
710
+
711
+ # Feature Requirements Section for Batch
712
+ with gr.Accordion("📋 Required Feature Columns (52) - Click to View Details", open=False):
713
+ gr.HTML("""
714
+ <div style="background: linear-gradient(to right, #fff3e0, #fffbf0); padding: 20px;
715
+ border-radius: 10px; border-left: 5px solid #ff9800; margin-bottom: 15px;">
716
+ <h3 style="color: #e65100; margin: 0 0 10px 0;">📌 CSV Format Requirements</h3>
717
+ <ul style="color: #5d4037; line-height: 1.8; margin: 0;">
718
+ <li>Your CSV must contain <strong>all 52 required feature columns</strong> listed in the table below</li>
719
+ <li>Column names must <strong>exactly match</strong> the names shown (case-sensitive)</li>
720
+ <li>Optionally include an ID column (<code>id</code>, <code>toi</code>, or <code>tid</code>) to track your samples</li>
721
+ <li>Use <code>0</code> or leave empty for unknown/missing values (will be imputed automatically)</li>
722
+ <li>Each row represents one exoplanet candidate to classify</li>
723
+ <li>Can process multiple samples at once (no row limit)</li>
724
+ </ul>
725
+ </div>
726
+ """)
727
+
728
+ batch_feature_table = """<div style='max-height: 500px; overflow-y: auto; padding: 10px; background: white; border-radius: 8px; box-shadow: 0 2px 8px rgba(0,0,0,0.1);'>
729
+ <table style='width: 100%; border-collapse: collapse; font-size: 13px;'>
730
+ <thead style='position: sticky; top: 0; background: #ff9800; color: white; z-index: 10;'>
731
+ <tr>
732
+ <th style='padding: 12px; text-align: left; border: 1px solid #ddd;'>Column Name</th>
733
+ <th style='padding: 12px; text-align: left; border: 1px solid #ddd;'>Parameter</th>
734
+ <th style='padding: 12px; text-align: left; border: 1px solid #ddd;'>Description</th>
735
+ </tr>
736
+ </thead>
737
+ <tbody>"""
738
+
739
+ for i, feat in enumerate(BASE_FEATURES):
740
+ label, desc = PARAM_INFO[feat]
741
+ bg_color = '#fff3e0' if i % 2 == 0 else 'white'
742
+ batch_feature_table += f"""
743
+ <tr style='background: {bg_color};'>
744
+ <td style='padding: 8px; border: 1px solid #ddd; font-family: monospace; color: #c7254e; background-color: #f9f2f4; font-weight: bold;'>{feat}</td>
745
+ <td style='padding: 8px; border: 1px solid #ddd; font-weight: 600; color: #333;'>{label}</td>
746
+ <td style='padding: 8px; border: 1px solid #ddd; color: #555;'>{desc}</td>
747
+ </tr>"""
748
+
749
+ batch_feature_table += """</tbody></table></div>"""
750
+ gr.HTML(batch_feature_table)
751
+
752
+ with gr.Row():
753
+ with gr.Column(scale=1):
754
+ batch_file = gr.File(label="📁 Upload CSV File", file_types=[".csv"])
755
+ batch_model = gr.Dropdown(
756
+ choices=['Ensemble', 'LightGBM', 'XGBoost', 'RandomForest', 'ExtraTrees'],
757
+ value='Ensemble',
758
+ label="🤖 Classification Model"
759
+ )
760
+ batch_btn = gr.Button("🚀 Process Batch", variant="primary", size="lg")
761
+
762
+ gr.HTML("""
763
+ <div style="background: #e8f5e9; border-left: 5px solid #4caf50;
764
+ padding: 20px; border-radius: 10px; margin-top: 20px;">
765
+ <h4 style="color: #2e7d32; margin-top: 0;">💡 Download Template</h4>
766
+ <p style="color: #1b5e20; margin-bottom: 15px;">
767
+ Need a template? Click below to download a properly formatted CSV file with all required columns.
768
+ </p>
769
+ """)
770
+ template_btn = gr.Button("📥 Download Template CSV", variant="secondary")
771
+ template_output = gr.File(label="Template File")
772
+ gr.HTML("</div>")
773
+
774
+ with gr.Column(scale=2):
775
+ batch_summary = gr.Markdown()
776
+ batch_results = gr.Dataframe(label="Results Preview")
777
+ batch_download = gr.File(label="📥 Download Results (ID, Label)")
778
+
779
+ batch_btn.click(
780
+ fn=predict_batch,
781
+ inputs=[batch_file, batch_model],
782
+ outputs=[batch_summary, batch_results, batch_download]
783
+ )
784
+
785
+ template_btn.click(fn=get_template, outputs=template_output)
786
+
787
+ # Footer
788
+ gr.HTML("""
789
+ <div style="margin-top: 50px; padding: 30px; background: linear-gradient(135deg, #1a1a2e 0%, #16213e 100%);
790
+ border-radius: 15px; text-align: center; box-shadow: 0 8px 20px rgba(0,0,0,0.4);">
791
+ <h3 style="color: #e0e0e0; margin: 0 0 15px 0;">🌟 Exoplanet Classification System</h3>
792
+ <p style="color: #b0b0b0; font-size: 15px; margin: 10px 0;">
793
+ Powered by: <strong style="color: #88c0d0;">LightGBM</strong> • <strong style="color: #81a1c1;">XGBoost</strong> • <strong style="color: #5e81ac;">Random Forest</strong> • <strong style="color: #8fbcbb;">Scikit-learn</strong>
794
+ </p>
795
+ <p style="color: #888; font-size: 13px; margin: 10px 0;">
796
+ Data: TESS Mission | Built with Gradio | Version 2025.10.04
797
+ </p>
798
+ <p style="color: #666; font-size: 12px; margin: 10px 0;">
799
+ ⚠️ For research and educational purposes. Validate results independently.
800
+ </p>
801
+ </div>
802
+ """)
803
+
804
+ # ====================================================================
805
+ # LAUNCH
806
+ # ====================================================================
807
+
808
+ if __name__ == "__main__":
809
+ print("\n" + "="*70)
810
+ print(" " * 20 + "LAUNCHING WEB APPLICATION")
811
+ print("="*70 + "\n")
812
+
813
+ app.launch(
814
+ server_name="0.0.0.0",
815
+ server_port=7860,
816
+ show_error=True,
817
+ share=True
818
+ )
mutiresult/classification_report_Ensemble.txt ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Classification Report: Ensemble
2
+ ======================================================================
3
+
4
+ precision recall f1-score support
5
+
6
+ APC 0.5000 0.2105 0.2963 19
7
+ CP 0.3500 0.2121 0.2642 33
8
+ FA 0.0000 0.0000 0.0000 1
9
+ FP 0.3333 0.1400 0.1972 50
10
+ KP 0.7143 0.5556 0.6250 54
11
+ PC 0.7638 0.9157 0.8329 332
12
+
13
+ accuracy 0.7198 489
14
+ macro avg 0.4436 0.3390 0.3693 489
15
+ weighted avg 0.6746 0.7198 0.6840 489
mutiresult/classification_report_ExtraTrees.txt ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Classification Report: ExtraTrees
2
+ ======================================================================
3
+
4
+ precision recall f1-score support
5
+
6
+ APC 0.5000 0.0526 0.0952 19
7
+ CP 0.2727 0.0909 0.1364 33
8
+ FA 0.0000 0.0000 0.0000 1
9
+ FP 0.2500 0.0400 0.0690 50
10
+ KP 0.8462 0.4074 0.5500 54
11
+ PC 0.7195 0.9578 0.8217 332
12
+
13
+ accuracy 0.7076 489
14
+ macro avg 0.4314 0.2581 0.2787 489
15
+ weighted avg 0.6453 0.7076 0.6386 489
mutiresult/classification_report_LightGBM.txt ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Classification Report: LightGBM
2
+ ======================================================================
3
+
4
+ precision recall f1-score support
5
+
6
+ APC 0.4167 0.2632 0.3226 19
7
+ CP 0.3214 0.2727 0.2951 33
8
+ FA 0.0000 0.0000 0.0000 1
9
+ FP 0.4000 0.2400 0.3000 50
10
+ KP 0.6000 0.6111 0.6055 54
11
+ PC 0.7995 0.8765 0.8362 332
12
+
13
+ accuracy 0.7157 489
14
+ macro avg 0.4229 0.3773 0.3932 489
15
+ weighted avg 0.6878 0.7157 0.6977 489
mutiresult/classification_report_LogisticRegression.txt ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Classification Report: LogisticRegression
2
+ ======================================================================
3
+
4
+ precision recall f1-score support
5
+
6
+ APC 0.1739 0.6316 0.2727 19
7
+ CP 0.2278 0.5455 0.3214 33
8
+ FA 0.0000 0.0000 0.0000 1
9
+ FP 0.4333 0.5200 0.4727 50
10
+ KP 0.3626 0.6111 0.4552 54
11
+ PC 0.9198 0.5181 0.6628 332
12
+
13
+ accuracy 0.5337 489
14
+ macro avg 0.3529 0.4710 0.3641 489
15
+ weighted avg 0.7310 0.5337 0.5809 489
mutiresult/classification_report_RandomForest.txt ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Classification Report: RandomForest
2
+ ======================================================================
3
+
4
+ precision recall f1-score support
5
+
6
+ APC 0.0000 0.0000 0.0000 19
7
+ CP 0.2222 0.0606 0.0952 33
8
+ FA 0.0000 0.0000 0.0000 1
9
+ FP 0.1429 0.0200 0.0351 50
10
+ KP 0.8462 0.4074 0.5500 54
11
+ PC 0.7152 0.9608 0.8201 332
12
+
13
+ accuracy 0.7035 489
14
+ macro avg 0.3211 0.2415 0.2501 489
15
+ weighted avg 0.6087 0.7035 0.6275 489
mutiresult/classification_report_XGBoost.txt ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Classification Report: XGBoost
2
+ ======================================================================
3
+
4
+ precision recall f1-score support
5
+
6
+ APC 0.5000 0.2105 0.2963 19
7
+ CP 0.3478 0.2424 0.2857 33
8
+ FA 0.0000 0.0000 0.0000 1
9
+ FP 0.3333 0.1400 0.1972 50
10
+ KP 0.7143 0.5556 0.6250 54
11
+ PC 0.7696 0.9157 0.8363 332
12
+
13
+ accuracy 0.7219 489
14
+ macro avg 0.4442 0.3440 0.3734 489
15
+ weighted avg 0.6784 0.7219 0.6878 489
mutiresult/confusion_matrices.png ADDED

Git LFS Details

  • SHA256: 3883dce3a061c521009dc1e082ad53d4b9ad4c30bbf867652bf3acf44054c254
  • Pointer size: 131 Bytes
  • Size of remote file: 593 kB
mutiresult/feature_importance.png ADDED

Git LFS Details

  • SHA256: ef4280ac3cb41d787266341e3efedc516b004e211f9758703b40523d7286e0d2
  • Pointer size: 131 Bytes
  • Size of remote file: 600 kB
mutiresult/feature_info.json ADDED
@@ -0,0 +1,126 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "all_features": [
3
+ "ra",
4
+ "dec",
5
+ "st_pmra",
6
+ "st_pmraerr1",
7
+ "st_pmraerr2",
8
+ "st_pmralim",
9
+ "st_pmdec",
10
+ "st_pmdecerr1",
11
+ "st_pmdecerr2",
12
+ "st_pmdeclim",
13
+ "pl_tranmid",
14
+ "pl_tranmiderr1",
15
+ "pl_tranmiderr2",
16
+ "pl_tranmidlim",
17
+ "pl_orbper",
18
+ "pl_orbpererr1",
19
+ "pl_orbpererr2",
20
+ "pl_orbperlim",
21
+ "pl_trandurh",
22
+ "pl_trandurherr1",
23
+ "pl_trandurherr2",
24
+ "pl_trandurhlim",
25
+ "pl_trandep",
26
+ "pl_trandeperr1",
27
+ "pl_trandeperr2",
28
+ "pl_trandeplim",
29
+ "pl_rade",
30
+ "pl_radeerr1",
31
+ "pl_radeerr2",
32
+ "pl_radelim",
33
+ "pl_insol",
34
+ "pl_insolerr1",
35
+ "pl_insolerr2",
36
+ "pl_insollim",
37
+ "pl_eqt",
38
+ "pl_eqterr1",
39
+ "pl_eqterr2",
40
+ "pl_eqtlim",
41
+ "st_tmag",
42
+ "st_tmagerr1",
43
+ "st_tmagerr2",
44
+ "st_tmaglim",
45
+ "st_dist",
46
+ "st_disterr1",
47
+ "st_disterr2",
48
+ "st_distlim",
49
+ "st_teff",
50
+ "st_tefferr1",
51
+ "st_tefferr2",
52
+ "st_tefflim",
53
+ "st_logg",
54
+ "st_loggerr1",
55
+ "st_loggerr2",
56
+ "st_logglim",
57
+ "st_rad",
58
+ "st_raderr1",
59
+ "st_raderr2",
60
+ "st_radlim"
61
+ ],
62
+ "selected_features": [
63
+ "ra",
64
+ "dec",
65
+ "st_pmra",
66
+ "st_pmraerr1",
67
+ "st_pmraerr2",
68
+ "st_pmralim",
69
+ "st_pmdec",
70
+ "st_pmdecerr1",
71
+ "st_pmdecerr2",
72
+ "st_pmdeclim",
73
+ "pl_tranmid",
74
+ "pl_tranmiderr1",
75
+ "pl_tranmiderr2",
76
+ "pl_tranmidlim",
77
+ "pl_orbper",
78
+ "pl_orbpererr1",
79
+ "pl_orbpererr2",
80
+ "pl_trandurh",
81
+ "pl_trandurherr1",
82
+ "pl_trandurherr2",
83
+ "pl_trandep",
84
+ "pl_trandeperr1",
85
+ "pl_trandeperr2",
86
+ "pl_rade",
87
+ "pl_radeerr1",
88
+ "pl_radeerr2",
89
+ "pl_radelim",
90
+ "pl_insol",
91
+ "pl_eqt",
92
+ "st_tmag",
93
+ "st_tmagerr1",
94
+ "st_dist",
95
+ "st_disterr1",
96
+ "st_disterr2",
97
+ "st_tefferr1",
98
+ "st_tefferr2",
99
+ "st_tefflim",
100
+ "st_logg",
101
+ "st_loggerr1",
102
+ "st_loggerr2",
103
+ "st_rad",
104
+ "st_raderr1",
105
+ "st_raderr2",
106
+ "st_radlim",
107
+ "luminosity_proxy",
108
+ "planet_star_radius_ratio",
109
+ "transit_depth_proxy",
110
+ "log_distance",
111
+ "inv_distance",
112
+ "temp_ratio",
113
+ "log_insol",
114
+ "sqrt_insol",
115
+ "log_orbper",
116
+ "st_pmra_error_ratio",
117
+ "st_pmdec_error_ratio",
118
+ "pl_rade_error_ratio",
119
+ "feature_mean",
120
+ "feature_std",
121
+ "feature_max",
122
+ "feature_min"
123
+ ],
124
+ "n_features": 58,
125
+ "n_selected": 60
126
+ }
mutiresult/feature_scores.csv ADDED
@@ -0,0 +1,69 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ feature,score
2
+ pl_tranmid,0.10058388860470635
3
+ pl_trandurherr2,0.0943360041537582
4
+ pl_trandurherr1,0.09260907503584814
5
+ st_tmag,0.0773120734401902
6
+ pl_rade_error_ratio,0.0746345642044055
7
+ pl_tranmiderr1,0.07072746195404522
8
+ pl_tranmiderr2,0.06939452988563999
9
+ pl_trandeperr2,0.06708691436040959
10
+ pl_trandeperr1,0.06708616815245194
11
+ st_rad,0.0653051637978963
12
+ st_loggerr1,0.060289975854914246
13
+ log_orbper,0.05710053100824575
14
+ pl_orbper,0.05673082307809074
15
+ st_loggerr2,0.055891289285257884
16
+ st_tefferr1,0.054497754817492305
17
+ st_dist,0.054088027146716566
18
+ inv_distance,0.053659444012782664
19
+ log_distance,0.05353421359003718
20
+ st_tefferr2,0.05252489140575456
21
+ pl_trandurh,0.050686071123912546
22
+ st_pmraerr2,0.047570679656092185
23
+ dec,0.04553914467762987
24
+ st_raderr2,0.038896264430238414
25
+ st_pmdecerr2,0.03888845727031187
26
+ st_disterr1,0.03736688469809901
27
+ st_disterr2,0.03631174509371071
28
+ pl_orbpererr1,0.03599140473565621
29
+ pl_rade,0.0359609738241089
30
+ pl_insol,0.034403756845682976
31
+ sqrt_insol,0.03417849794237493
32
+ log_insol,0.03405330470849344
33
+ st_raderr1,0.03337667004872613
34
+ pl_radeerr1,0.029370008341174758
35
+ pl_radeerr2,0.029187264207060748
36
+ st_pmdecerr1,0.0290700400755326
37
+ temp_ratio,0.028656777472438222
38
+ pl_orbpererr2,0.02803107120731263
39
+ st_pmraerr1,0.02736478939231235
40
+ st_logg,0.027120721911320533
41
+ feature_min,0.027058757264905253
42
+ pl_trandep,0.025588020080648866
43
+ pl_eqt,0.02416596292758255
44
+ planet_star_radius_ratio,0.018561314834198983
45
+ transit_depth_proxy,0.018370615691842174
46
+ feature_max,0.01766602864717748
47
+ feature_std,0.017552613001937445
48
+ feature_mean,0.01718836978668059
49
+ st_pmdec,0.014548337924330523
50
+ ra,0.014342613726746167
51
+ pl_radelim,0.014038127813360113
52
+ st_pmra,0.010297755012506737
53
+ st_pmra_error_ratio,0.008262326729144398
54
+ luminosity_proxy,0.0078110578935157715
55
+ st_pmdeclim,0.006311307886967432
56
+ st_tefflim,0.005229401126498834
57
+ pl_tranmidlim,0.004476832142852993
58
+ st_tmagerr1,0.0036663497865565553
59
+ st_pmralim,0.003033599214105731
60
+ st_radlim,0.0030230088374789688
61
+ st_pmdec_error_ratio,0.0022816723367815595
62
+ pl_trandurhlim,0.0007805835305294551
63
+ st_tmaglim,0.00019178201976965958
64
+ pl_orbperlim,0.0
65
+ pl_trandeplim,0.0
66
+ st_tmagerr2,0.0
67
+ st_logglim,0.0
68
+ st_distlim,0.0
69
+ st_teff,0.0
mutiresult/imputer.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f23451bb545bd698bf5cb04f912f0b4e671e9d81bb9b0d652c6fd1ac264e67ed
3
+ size 3606665
mutiresult/label_encoder.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0034712499e29d1d1afdeac2af29869ef79da2b941af6d7807f6edbde92cd20d
3
+ size 504
mutiresult/learning_curves.png ADDED

Git LFS Details

  • SHA256: d2a6d016eac07d46465abf714a71f5bf5530c5aae82bc8b7a141dd921972c998
  • Pointer size: 131 Bytes
  • Size of remote file: 299 kB
mutiresult/metrics_summary.json ADDED
@@ -0,0 +1,62 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "LightGBM": {
3
+ "Accuracy": 0.7157464212678937,
4
+ "Precision": 0.6878147074466093,
5
+ "Recall": 0.7157464212678937,
6
+ "F1-Score": 0.6977191708337254,
7
+ "MCC": 0.40092736912599086,
8
+ "Cohen_Kappa": 0.3966231990838963,
9
+ "Log_Loss": 0.8667761106577778,
10
+ "AUC_OVR": 0.8349458443198322
11
+ },
12
+ "XGBoost": {
13
+ "Accuracy": 0.721881390593047,
14
+ "Precision": 0.6783850309985548,
15
+ "Recall": 0.721881390593047,
16
+ "F1-Score": 0.6877780716424755,
17
+ "MCC": 0.37628533150933247,
18
+ "Cohen_Kappa": 0.35900994679620624,
19
+ "Log_Loss": 0.8242094430417346,
20
+ "AUC_OVR": 0.8421016022429259
21
+ },
22
+ "RandomForest": {
23
+ "Accuracy": 0.7034764826175869,
24
+ "Precision": 0.6086510870686501,
25
+ "Recall": 0.7034764826175869,
26
+ "F1-Score": 0.6275139315266048,
27
+ "MCC": 0.2606656979570372,
28
+ "Cohen_Kappa": 0.20312658042909004,
29
+ "Log_Loss": 0.7921825120152793,
30
+ "AUC_OVR": 0.8300185572722183
31
+ },
32
+ "ExtraTrees": {
33
+ "Accuracy": 0.7075664621676891,
34
+ "Precision": 0.6453006875237117,
35
+ "Recall": 0.7075664621676891,
36
+ "F1-Score": 0.6385766896155018,
37
+ "MCC": 0.2798313236799552,
38
+ "Cohen_Kappa": 0.2245153706250277,
39
+ "Log_Loss": 0.7800578509848219,
40
+ "AUC_OVR": 0.8283101428969134
41
+ },
42
+ "LogisticRegression": {
43
+ "Accuracy": 0.5337423312883436,
44
+ "Precision": 0.7309640150800872,
45
+ "Recall": 0.5337423312883436,
46
+ "F1-Score": 0.5808969016846894,
47
+ "MCC": 0.361136062774439,
48
+ "Cohen_Kappa": 0.32511712933257464,
49
+ "Log_Loss": 1.2702846778722883,
50
+ "AUC_OVR": 0.794391056021819
51
+ },
52
+ "Ensemble": {
53
+ "Accuracy": 0.7198364008179959,
54
+ "Precision": 0.6745931184797294,
55
+ "Recall": 0.7198364008179959,
56
+ "F1-Score": 0.6839894359599863,
57
+ "MCC": 0.3674686647252085,
58
+ "Cohen_Kappa": 0.3486655971999416,
59
+ "Log_Loss": 0.7483648859389048,
60
+ "AUC_OVR": 0.8429211659418736
61
+ }
62
+ }
mutiresult/model_comparison.png ADDED

Git LFS Details

  • SHA256: 00e96640cce9d901bad6febace1805115a694f06a54889b209aa399f9a7a2643
  • Pointer size: 131 Bytes
  • Size of remote file: 472 kB
mutiresult/model_extratrees.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:790de4aa3a17d30ac083800263bc9d85b1e6273aba763304f336890e76b70f23
3
+ size 84380265
mutiresult/model_lightgbm.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e02771e8251b13456b2d3dc9d089d9a344de9bfae17c6e1137fc2935dcfd8e47
3
+ size 5715044
mutiresult/model_logisticregression.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9ca452867d013f5f0f67e74c041eac577b1dd12a79138caa62375acf06621b9e
3
+ size 4935
mutiresult/model_randomforest.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:80958dad24deb019fcbb3e831a123d8767b64ce926dfa19919e675c50c558680
3
+ size 50616470
mutiresult/model_xgboost.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:047dd9e61e19370d607592f79b8730ecfaf7096f55c3295c945d806305ac5520
3
+ size 7511049
mutiresult/prediction_distribution.png ADDED

Git LFS Details

  • SHA256: d8f00207676298308324a5693ef39649fbcd45164961f8ca8e3500ba9f5aed80
  • Pointer size: 131 Bytes
  • Size of remote file: 315 kB
mutiresult/roc_curves_ensemble.png ADDED

Git LFS Details

  • SHA256: 06acafdf4eaddedcf65cd83421c84a8973418ac1f10372b72b5a9dff9e88e1ee
  • Pointer size: 131 Bytes
  • Size of remote file: 282 kB
mutiresult/scaler.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b0eca4f5252a5351a74a7c90f7393c3fe71385c235b2ccb8b0362d70e661ae8a
3
+ size 2807
requirements.txt ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ gradio==4.44.0
2
+ pandas==2.2.3
3
+ numpy==1.26.4
4
+ scikit-learn==1.5.2
5
+ joblib==1.4.2
6
+ lightgbm==4.5.0
7
+ xgboost==2.1.1