Spaces:
Build error
Build error
Update app.py
Browse files
app.py
CHANGED
|
@@ -1,147 +1,297 @@
|
|
|
|
|
| 1 |
import io
|
| 2 |
-
import
|
| 3 |
-
|
|
|
|
| 4 |
|
| 5 |
-
import
|
|
|
|
| 6 |
import panel as pn
|
| 7 |
-
|
| 8 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 9 |
|
| 10 |
-
|
|
|
|
| 11 |
|
| 12 |
-
ICON_URLS = {
|
| 13 |
-
"brand-github": "https://github.com/holoviz/panel",
|
| 14 |
-
"brand-twitter": "https://twitter.com/Panel_Org",
|
| 15 |
-
"brand-linkedin": "https://www.linkedin.com/company/panel-org",
|
| 16 |
-
"message-circle": "https://discourse.holoviz.org/",
|
| 17 |
-
"brand-discord": "https://discord.gg/AXRHnJU6sP",
|
| 18 |
-
}
|
| 19 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 20 |
|
| 21 |
-
|
| 22 |
-
|
| 23 |
-
api_url = f"https://api.the{pet}api.com/v1/images/search"
|
| 24 |
-
async with aiohttp.ClientSession() as session:
|
| 25 |
-
async with session.get(api_url) as resp:
|
| 26 |
-
return (await resp.json())[0]["url"]
|
| 27 |
|
|
|
|
| 28 |
|
| 29 |
-
|
| 30 |
-
def load_processor_model(
|
| 31 |
-
processor_name: str, model_name: str
|
| 32 |
-
) -> Tuple[CLIPProcessor, CLIPModel]:
|
| 33 |
-
processor = CLIPProcessor.from_pretrained(processor_name)
|
| 34 |
-
model = CLIPModel.from_pretrained(model_name)
|
| 35 |
-
return processor, model
|
| 36 |
|
|
|
|
| 37 |
|
| 38 |
-
|
| 39 |
-
|
| 40 |
-
async with session.get(image_url) as resp:
|
| 41 |
-
return Image.open(io.BytesIO(await resp.read()))
|
| 42 |
|
| 43 |
-
|
| 44 |
-
|
| 45 |
-
|
| 46 |
-
|
| 47 |
-
)
|
| 48 |
-
|
| 49 |
-
|
| 50 |
-
|
| 51 |
-
|
| 52 |
-
|
| 53 |
-
outputs = model(**inputs)
|
| 54 |
-
logits_per_image = outputs.logits_per_image
|
| 55 |
-
class_likelihoods = logits_per_image.softmax(dim=1).detach().numpy()
|
| 56 |
-
return class_likelihoods[0]
|
| 57 |
|
| 58 |
|
| 59 |
-
|
| 60 |
-
"""
|
| 61 |
-
High level function that takes in the user inputs and returns the
|
| 62 |
-
classification results as panel objects.
|
| 63 |
-
"""
|
| 64 |
try:
|
| 65 |
-
|
| 66 |
-
|
| 67 |
-
|
| 68 |
-
|
| 69 |
-
|
| 70 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 71 |
try:
|
| 72 |
-
|
| 73 |
-
|
| 74 |
-
|
| 75 |
-
|
| 76 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 77 |
|
| 78 |
-
|
| 79 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 80 |
|
| 81 |
-
|
| 82 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 83 |
|
| 84 |
-
for class_item, class_likelihood in zip(class_items, class_likelihoods):
|
| 85 |
-
row_label = pn.widgets.StaticText(
|
| 86 |
-
name=class_item.strip(), value=f"{class_likelihood:.2%}", align="center"
|
| 87 |
-
)
|
| 88 |
-
row_bar = pn.indicators.Progress(
|
| 89 |
-
value=int(class_likelihood * 100),
|
| 90 |
-
sizing_mode="stretch_width",
|
| 91 |
-
bar_color="secondary",
|
| 92 |
-
margin=(0, 10),
|
| 93 |
-
design=pn.theme.Material,
|
| 94 |
-
)
|
| 95 |
-
results.append(pn.Column(row_label, row_bar))
|
| 96 |
-
yield results
|
| 97 |
-
finally:
|
| 98 |
-
main.disabled = False
|
| 99 |
|
| 100 |
|
| 101 |
-
# create widgets
|
| 102 |
-
randomize_url = pn.widgets.Button(name="Randomize URL", align="end")
|
| 103 |
|
| 104 |
-
|
| 105 |
-
|
| 106 |
-
|
| 107 |
-
|
| 108 |
-
|
| 109 |
-
|
| 110 |
-
|
| 111 |
-
value
|
| 112 |
-
)
|
|
|
|
|
|
|
|
|
|
| 113 |
|
| 114 |
-
|
| 115 |
-
|
| 116 |
-
|
| 117 |
-
|
| 118 |
-
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 119 |
|
| 120 |
-
|
| 121 |
-
|
| 122 |
-
|
| 123 |
-
|
| 124 |
-
)
|
|
|
|
|
|
|
|
|
|
| 125 |
|
| 126 |
-
# add footer
|
| 127 |
-
footer_row = pn.Row(pn.Spacer(), align="center")
|
| 128 |
-
for icon, url in ICON_URLS.items():
|
| 129 |
-
href_button = pn.widgets.Button(icon=icon, width=35, height=35)
|
| 130 |
-
href_button.js_on_click(code=f"window.open('{url}')")
|
| 131 |
-
footer_row.append(href_button)
|
| 132 |
-
footer_row.append(pn.Spacer())
|
| 133 |
-
|
| 134 |
-
# create dashboard
|
| 135 |
-
main = pn.WidgetBox(
|
| 136 |
-
input_widgets,
|
| 137 |
-
interactive_result,
|
| 138 |
-
footer_row,
|
| 139 |
-
)
|
| 140 |
|
| 141 |
-
|
| 142 |
-
|
| 143 |
-
|
| 144 |
-
|
| 145 |
-
|
| 146 |
-
|
| 147 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import numpy as np
|
| 2 |
import io
|
| 3 |
+
import sys
|
| 4 |
+
import os
|
| 5 |
+
import pandas as pd
|
| 6 |
|
| 7 |
+
import gc #garabage collector
|
| 8 |
+
from io import BytesIO
|
| 9 |
import panel as pn
|
| 10 |
+
import holoviews as hv
|
| 11 |
+
import hvplot.pandas
|
| 12 |
+
from warnings import filterwarnings
|
| 13 |
+
'''
|
| 14 |
+
development env: panel serve script.py --autoreload
|
| 15 |
+
prod prep: panel convert script.py --to pyodide-worker --out pyodide
|
| 16 |
+
'''
|
| 17 |
|
| 18 |
+
filterwarnings("ignore")
|
| 19 |
+
hv.extension('bokeh')
|
| 20 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 21 |
|
| 22 |
+
text = """
|
| 23 |
+
# Feature Distribution and Stats
|
| 24 |
+
## AUTHOR: [`FIRAS ALI OBEID`](https://www.linkedin.com/in/feras-obeid/)
|
| 25 |
+
### GNU General Public License v3.0 (GPL-3.0)
|
| 26 |
+
#### Developed while working at [OppFi Inc.](https://www.oppfi.com/)
|
| 27 |
|
| 28 |
+
This tool performs feature binning by equal intervals and by equal pouplations in each interval vs bad rate/target binary variable
|
| 29 |
+
To get the feature deep dive feature distribution:
|
|
|
|
|
|
|
|
|
|
|
|
|
| 30 |
|
| 31 |
+
1. Upload a CSV (only numerical data)
|
| 32 |
|
| 33 |
+
2. Choose & press on the binary (0 / 1) target column in the `Select Target Variable` section below
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 34 |
|
| 35 |
+
3. Press Run Analysis
|
| 36 |
|
| 37 |
+
4. Wait few seconds and analyze the updated charts
|
| 38 |
+
"""
|
|
|
|
|
|
|
| 39 |
|
| 40 |
+
file_input = pn.widgets.FileInput(align='center')
|
| 41 |
+
selector = pn.widgets.MultiSelect(name='Select Target Variable')
|
| 42 |
+
button = pn.widgets.Button(name='Run Analysis')
|
| 43 |
+
widgets = pn.WidgetBox(
|
| 44 |
+
pn.panel(text, margin=(0, 10)),
|
| 45 |
+
pn.panel('Upload a CSV containing (X) features and (y) binary variable:', margin=(0, 10)),
|
| 46 |
+
file_input,
|
| 47 |
+
selector,
|
| 48 |
+
button
|
| 49 |
+
)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 50 |
|
| 51 |
|
| 52 |
+
def closest(lst, K):
|
|
|
|
|
|
|
|
|
|
|
|
|
| 53 |
try:
|
| 54 |
+
return lst[min(range(len(lst)), key = lambda i: abs(lst[i]-K))]
|
| 55 |
+
except:
|
| 56 |
+
return K
|
| 57 |
+
control_max = lambda x: x.max() * 1.01 if x.max() > 0 else (x.max() * 0.99 if x.max() < 0 else x.max() + 0.01)
|
| 58 |
+
control_min = lambda x: x.min() * 0.99 if x.min() > 0 else (x.min() * 1.01 if x.min() < 0 else x.min() - 0.01)
|
| 59 |
+
|
| 60 |
+
def get_data():
|
| 61 |
+
global target, New_Refit_routing
|
| 62 |
+
if file_input.value is None:
|
| 63 |
+
New_Refit_routing = pd.DataFrame({"Open_accounts": np.random.randint(1,50,100000),
|
| 64 |
+
"Income": np.random.randint(1000,20000,100000),
|
| 65 |
+
"Years_of_experience": np.random.randint(0,20,100000),
|
| 66 |
+
"default": np.random.random_integers(0,1,100000)})
|
| 67 |
+
target = "default"
|
| 68 |
+
else:
|
| 69 |
+
New_Refit_routing = BytesIO()
|
| 70 |
+
New_Refit_routing.write(file_input.value)
|
| 71 |
+
New_Refit_routing.seek(0)
|
| 72 |
try:
|
| 73 |
+
New_Refit_routing = pd.read_csv(New_Refit_routing, error_bad_lines=False).apply(pd.to_numeric, errors='ignore')#.set_index("id")
|
| 74 |
+
except:
|
| 75 |
+
New_Refit_routing = pd.read_csv(New_Refit_routing, error_bad_lines=False)
|
| 76 |
+
target = None
|
| 77 |
+
New_Refit_routing = New_Refit_routing.select_dtypes(exclude=['datetime', "category","object"])
|
| 78 |
+
New_Refit_routing = New_Refit_routing.replace([np.inf, -np.inf], np.nan)
|
| 79 |
+
# New_Refit_routing = New_Refit_routing[[cols for cols in New_Refit_routing.columns if New_Refit_routing[cols].nunique() >= 2]] #remove columns with less then 2 unique values
|
| 80 |
+
return target, New_Refit_routing
|
| 81 |
+
|
| 82 |
+
|
| 83 |
+
def update_target(event):
|
| 84 |
+
_ , New_Refit_routing = get_data()
|
| 85 |
+
target = list(New_Refit_routing.columns)
|
| 86 |
+
selector.set_param(options=target, value=target)
|
| 87 |
+
|
| 88 |
+
file_input.param.watch(update_target, 'value')
|
| 89 |
+
update_target(None)
|
| 90 |
+
|
| 91 |
+
|
| 92 |
+
|
| 93 |
+
def stats_():
|
| 94 |
+
global stats
|
| 95 |
+
stats = New_Refit_routing.describe().T
|
| 96 |
+
stats["Missing_Values(%)"] = (New_Refit_routing.isna().sum() / len(New_Refit_routing)) * 100
|
| 97 |
+
stats = pd.concat([stats, New_Refit_routing.quantile(q = [.01, .05, .95, .99]).T.rename(columns = {0.01: '1%', 0.05: '5%', 0.95: '95%', 0.99:'99%'})], axis = 1)
|
| 98 |
+
stats = stats[['count', 'mean', 'std', 'min', '1%', '5%' ,'25%', '50%', '75%', '95%', '99%', 'max','Missing_Values(%)']]
|
| 99 |
+
stats = stats.round(4).astype(str)
|
| 100 |
+
|
| 101 |
+
def cuts_(target):
|
| 102 |
+
global test, test2, final_df , outlier_removed_stats
|
| 103 |
+
df = New_Refit_routing.copy()
|
| 104 |
+
neglect = [target] + [cols for cols in df.columns if df[cols].nunique() <= 2] #remove binary and target variable
|
| 105 |
+
cols = df.columns.difference(neglect) # Getting all columns except the ones in []
|
| 106 |
+
|
| 107 |
+
#REMOVE OUTIERS#
|
| 108 |
+
df[cols] = df[cols].apply(lambda col: col.clip(lower = col.quantile(.01),
|
| 109 |
+
upper = closest(col[col < col.quantile(.99)].dropna().values,
|
| 110 |
+
col.quantile(.99))),axis = 0)
|
| 111 |
+
|
| 112 |
+
outlier_removed_stats = df.describe().T
|
| 113 |
+
remove_feature = list(outlier_removed_stats[(outlier_removed_stats["mean"]==outlier_removed_stats["max"]) &
|
| 114 |
+
(outlier_removed_stats["mean"]==outlier_removed_stats["min"])].index)
|
| 115 |
+
outlier_removed_stats = pd.concat([outlier_removed_stats, df.quantile(q = [.01, .05, .95, .99]).T.rename(columns = {0.01: '1%', 0.05: '5%', 0.95: '95%', 0.99:'99%'})], axis = 1)
|
| 116 |
+
outlier_removed_stats = outlier_removed_stats[['count', 'mean', 'std', 'min', '1%', '5%' ,'25%', '50%', '75%', '95%', '99%', 'max']]
|
| 117 |
+
outlier_removed_stats = outlier_removed_stats.round(4).astype(str)
|
| 118 |
+
|
| 119 |
+
neglect += remove_feature
|
| 120 |
+
cols = df.columns.difference(neglect) # Getting all columns except the ones in []
|
| 121 |
+
|
| 122 |
|
| 123 |
+
df[cols] = df[cols].apply(lambda col: pd.cut(col.fillna(np.nan),
|
| 124 |
+
bins = pd.interval_range(start=float(np.apply_along_axis(control_min , 0,col.dropna())), end = float(np.apply_along_axis(control_max , 0,col.dropna())),
|
| 125 |
+
periods = 10), include_lowest=True).cat.add_categories(pd.Categorical(f"Missing_{col.name}")).fillna(f"Missing_{col.name}"), axis=0)
|
| 126 |
+
|
| 127 |
+
|
| 128 |
+
test = pd.concat([df[cols].value_counts(normalize = True) for cols in df[cols]], axis = 1)
|
| 129 |
+
cols = test.columns
|
| 130 |
+
test = test.reset_index().melt(id_vars="index",
|
| 131 |
+
var_name='column',
|
| 132 |
+
value_name='value').dropna().reset_index(drop = True)
|
| 133 |
+
|
| 134 |
+
|
| 135 |
+
test = test.rename(columns={"index":"IntervalCuts", "column":"feature", "value":"Count_Pct"})
|
| 136 |
+
test.Count_Pct = test.Count_Pct.round(4)
|
| 137 |
+
test.IntervalCuts = test.IntervalCuts.astype(str)
|
| 138 |
+
test.IntervalCuts = test.IntervalCuts.apply(lambda x: "("+str(round(float(x.split(",")[0].strip("(")),4)) +', ' + str(round(float(x.split(",")[-1].strip("]")),4)) +"]" if (x.split(",")[0].strip("(").strip("-")[0]).isdigit() else x)
|
| 139 |
+
|
| 140 |
+
test2 = pd.concat([df.groupby(col)[target].mean().fillna(0) for col in df[cols]], axis = 1)
|
| 141 |
+
test2.columns = cols
|
| 142 |
+
test2 = test2.reset_index().melt(id_vars="index", var_name='column', value_name='value').dropna().reset_index(drop = True)
|
| 143 |
+
test2 = test2.rename(columns={"index":"IntervalCuts", "column":"feature", "value":"Bad_Rate_Pct"})
|
| 144 |
+
test2.Bad_Rate_Pct = test2.Bad_Rate_Pct.round(4)
|
| 145 |
+
test2.IntervalCuts = test2.IntervalCuts.astype(str)
|
| 146 |
+
test2.IntervalCuts = test2.IntervalCuts.apply(lambda x: "("+str(round(float(x.split(",")[0].strip("(")),4)) +', ' + str(round(float(x.split(",")[-1].strip("]")),4)) +"]" if (x.split(",")[0].strip("(").strip("-")[0]).isdigit() else x)
|
| 147 |
+
|
| 148 |
+
|
| 149 |
+
test["index"] = test["feature"] + "_" + test["IntervalCuts"]
|
| 150 |
+
test = test.set_index("index").sort_index()
|
| 151 |
+
test2["index"] = test2["feature"] + "_" + test2["IntervalCuts"]
|
| 152 |
+
test2 = test2.set_index("index").sort_index()
|
| 153 |
+
final_df = pd.merge(test2, test[test.columns.difference(test2.columns)], on = "index")
|
| 154 |
+
|
| 155 |
+
|
| 156 |
+
## QCUT ##
|
| 157 |
+
def qcuts_(target):
|
| 158 |
+
global test_q, test2_q, final_df_q
|
| 159 |
+
df2 = New_Refit_routing.copy()
|
| 160 |
+
neglect = [target] + [cols for cols in df2.columns if df2[cols].nunique() <= 2] #remove binary and target variable
|
| 161 |
+
cols = df2.columns.difference(neglect) # Getting all columns except the ones in []
|
| 162 |
+
|
| 163 |
+
#DEBUGGING CODE#####################################################################################
|
| 164 |
+
# for i in df2[cols].columns:
|
| 165 |
+
# print(i)
|
| 166 |
+
# print(df2[i][df2[i] < df2[i].quantile(.99)].dropna().values)
|
| 167 |
+
# print(df2[i].quantile(.99))
|
| 168 |
+
# print(closest(df2[i][df2[i] < df2[i].quantile(.99)].dropna().values, df2[i].quantile(.99)))
|
| 169 |
+
# df2.apply(lambda col: col.clip(lower = col.quantile(.01),
|
| 170 |
+
# upper = closest(col[col < col.quantile(.99)].dropna().values,
|
| 171 |
+
# col.quantile(.99))),axis = 0)
|
| 172 |
|
| 173 |
+
####################################################################################################
|
| 174 |
+
#REMOVE OUTIERS#
|
| 175 |
+
|
| 176 |
+
df2[cols] = df2[cols].apply(lambda col: col.clip(lower = col.quantile(.01),
|
| 177 |
+
upper = closest(col[col < col.quantile(.99)].dropna().values,
|
| 178 |
+
col.quantile(.99))),axis = 0)
|
| 179 |
+
|
| 180 |
+
temp = df2.describe().T
|
| 181 |
+
remove_feature = list(temp[(temp["mean"]==temp["max"]) &
|
| 182 |
+
(temp["mean"]==temp["min"])].index)
|
| 183 |
+
|
| 184 |
+
neglect+= remove_feature
|
| 185 |
+
cols = df2.columns.difference(neglect) # Getting all columns except the ones in []
|
| 186 |
+
# rank(method='first') is a must in qcut
|
| 187 |
+
# df2[cols] = df2[cols].apply(lambda col: pd.qcut(col.fillna(np.nan).rank(method='first'),
|
| 188 |
+
# q = 10, duplicates = "drop").cat.add_categories(pd.Categorical(f"Qcut_Missing_{col.name}")).fillna(f"Qcut_Missing_{col.name}"), axis=0)
|
| 189 |
+
df2[cols] = df2[cols].apply(lambda col: pd.qcut(col.fillna(np.nan).rank(method='first'),q = 10, labels=range(1,11)).cat.rename_categories({10:"Last"}).astype(str).replace(dict(dict(pd.concat([col,
|
| 190 |
+
pd.qcut(col.fillna(np.nan).rank(method='first'),q = 10, labels=range(1,11)).cat.rename_categories({10:"Last"})
|
| 191 |
+
.apply(str)], axis = 1, keys= ["feature", "qcuts"]).groupby("qcuts").agg([min, max]).reset_index().astype(str).set_index("qcuts",drop = False)
|
| 192 |
+
.apply(lambda x :x[0]+"_"+"("+str(round(float(x[1]),2))+","+str(round(float(x[2]),2))+"]",axis = 1)),**{"nan":f"Qcut_Missing_{col.name}"})), axis=0)
|
| 193 |
+
|
| 194 |
+
test_q = pd.concat([df2[cols].value_counts(normalize = True) for cols in df2[cols]], axis = 1)
|
| 195 |
+
cols = test_q.columns
|
| 196 |
+
test_q = test_q.reset_index().melt(id_vars="index",
|
| 197 |
+
var_name='column',
|
| 198 |
+
value_name='value').dropna().reset_index(drop = True)
|
| 199 |
+
|
| 200 |
+
|
| 201 |
+
test_q = test_q.rename(columns={"index":"IntervalCuts", "column":"feature", "value":"Count_Pct"})
|
| 202 |
+
test_q.Count_Pct = test_q.Count_Pct.round(4)
|
| 203 |
+
test_q.IntervalCuts = test_q.IntervalCuts.astype(str)
|
| 204 |
+
# test_q.IntervalCuts = test_q.IntervalCuts.apply(lambda x: "("+str(round(float(x.split(",")[0].strip("(")),4)) +', ' + str(round(float(x.split(",")[-1].strip("]")),4)) +"]" if (x.split(",")[0].strip("(")[0]).isdigit() else x)
|
| 205 |
+
|
| 206 |
+
|
| 207 |
+
test2_q = pd.concat([df2.groupby(col)[target].mean().fillna(0) for col in df2[cols]], axis = 1)
|
| 208 |
+
test2_q.columns = cols
|
| 209 |
+
test2_q = test2_q.reset_index().melt(id_vars="index", var_name='column', value_name='value').dropna().reset_index(drop = True)
|
| 210 |
+
test2_q = test2_q.rename(columns={"index":"IntervalCuts", "column":"feature", "value":"Bad_Rate_Pct"})
|
| 211 |
+
test2_q.Bad_Rate_Pct = test2_q.Bad_Rate_Pct.round(4)
|
| 212 |
+
test2_q.IntervalCuts = test2_q.IntervalCuts.astype(str)
|
| 213 |
+
# test2_q.IntervalCuts = test2_q.IntervalCuts.apply(lambda x: "("+str(round(float(x.split(",")[0].strip("(")),4)) +', ' + str(round(float(x.split(",")[-1].strip("]")),4)) +"]" if (x.split(",")[0].strip("(")[0]).isdigit() else x)
|
| 214 |
+
|
| 215 |
+
test_q["index"] = test_q["feature"] + "_" + test_q["IntervalCuts"]
|
| 216 |
+
test_q = test_q.set_index("index").sort_index()
|
| 217 |
+
test2_q["index"] = test2_q["feature"] + "_" + test2_q["IntervalCuts"]
|
| 218 |
+
test2_q = test2_q.set_index("index").sort_index()
|
| 219 |
+
final_df_q = pd.merge(test2_q, test_q[test_q.columns.difference(test2_q.columns)], on = "index")
|
| 220 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 221 |
|
| 222 |
|
|
|
|
|
|
|
| 223 |
|
| 224 |
+
@pn.depends(button.param.clicks)
|
| 225 |
+
def run(_):
|
| 226 |
+
target, New_Refit_routing = get_data()
|
| 227 |
+
if target == None:
|
| 228 |
+
target = str(selector.value[0])
|
| 229 |
+
else:
|
| 230 |
+
target = "default"
|
| 231 |
+
print(str(selector.value[0]))
|
| 232 |
+
print(target)
|
| 233 |
+
# print(type(file_input.value))
|
| 234 |
+
# print(type(New_Refit_routing))
|
| 235 |
+
print(New_Refit_routing.head())
|
| 236 |
|
| 237 |
+
stats_()
|
| 238 |
+
cuts_(target)
|
| 239 |
+
qcuts_(target)
|
| 240 |
+
test2_plot = test2.set_index("IntervalCuts").hvplot.scatter(yaxis = "left", y = "Bad_Rate_Pct",
|
| 241 |
+
groupby = "feature", xlabel = "Intervals(Bins)", ylabel = "%Count vs %BadRate",height = 500,
|
| 242 |
+
width = 1000, title = "Features Segments Cuts by Count", legend = True,label = "Bad Rate(%)").opts(xrotation=45, yformatter = "%.04f",show_grid=True,
|
| 243 |
+
framewise=True, color = "red", legend_position='top_right')
|
| 244 |
+
test_plot = test.set_index("IntervalCuts").hvplot.bar(y = "Count_Pct",
|
| 245 |
+
groupby = "feature", xlabel = "Intervals(Bins)", ylabel = "%Count vs %BadRate",height = 500,
|
| 246 |
+
width = 1000, title = "Features Segments Cuts by Count", legend=True, alpha=0.3, label ="Equal Intervals Data Points(%)").opts(xrotation=45, yformatter = "%.04f",show_grid=True, framewise=True, yaxis='left')
|
| 247 |
+
final_table = final_df.hvplot.table(groupby = "feature", width=400)
|
| 248 |
|
| 249 |
+
test2_plot_q = test2_q.set_index("IntervalCuts").hvplot.scatter(yaxis = "left", y = "Bad_Rate_Pct",
|
| 250 |
+
groupby = "feature", xlabel = "Intervals(Bins)", ylabel = "%Count vs %BadRate",height = 500,
|
| 251 |
+
width = 1000, title = "Features Segments Q_Cuts by Count", legend = True).opts(xrotation=45, yformatter = "%.04f",show_grid=True,
|
| 252 |
+
framewise=True, color = "red")
|
| 253 |
+
test_plot_q = test_q.set_index("IntervalCuts").hvplot.bar(y = "Count_Pct",
|
| 254 |
+
groupby = "feature", xlabel = "Intervals(Bins)", ylabel = "%Count vs %BadRate",height = 500,
|
| 255 |
+
width = 1000, title = "Features Segments Q_Cuts by Count", legend=True, alpha=0.3, label ="Equal Population Data Points(%)").opts(xrotation=45, yformatter = "%.04f",show_grid=True, framewise=True, yaxis='left')
|
| 256 |
+
final_table_q = final_df_q.hvplot.table(groupby = "feature", width=400)
|
| 257 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 258 |
|
| 259 |
+
stats_table = stats.reset_index().hvplot.table(width = 1000,title="Summary Statistics of the Data", hover = True, responsive=True,
|
| 260 |
+
shared_axes= False, fit_columns = True,
|
| 261 |
+
padding=True, height=500, index_position = 0, fontscale = 1.5)
|
| 262 |
+
stats_table_no_outliers = outlier_removed_stats.reset_index().hvplot.table(width = 1000,title="Summary Statistics of the Capped Outliers Data", hover = True, responsive=True,
|
| 263 |
+
shared_axes= False, fit_columns = True,
|
| 264 |
+
padding=True, height=500, index_position = 0, fontscale = 1.5)
|
| 265 |
+
#PANEL
|
| 266 |
+
pn.extension( template="fast")
|
| 267 |
+
pn.state.template.param.update(
|
| 268 |
+
# site_url="",
|
| 269 |
+
site="CreditRisk",
|
| 270 |
+
title="Feature Distribution & Statistics",
|
| 271 |
+
# favicon="https://raw.githubusercontent.com/opploans/DS_modelling_tools/main/docs/Resources/favicon.ico?token=GHSAT0AAAAAABYR5F6VDZ2PU33UY6NN7NQEY3C2ASA"
|
| 272 |
+
# favicon="",
|
| 273 |
+
)
|
| 274 |
+
|
| 275 |
+
title = pn.pane.Markdown(
|
| 276 |
+
"""
|
| 277 |
+
### Feature Distribution (Bin Count & Bad Rate)
|
| 278 |
+
""",
|
| 279 |
+
width=800,
|
| 280 |
+
)
|
| 281 |
+
|
| 282 |
+
return pn.Column(
|
| 283 |
+
title,
|
| 284 |
+
(test2_plot * test_plot * test2_plot_q * test_plot_q + (final_table + final_table_q)).cols(3),
|
| 285 |
+
(stats_table + stats_table_no_outliers).cols(2),
|
| 286 |
+
)
|
| 287 |
+
|
| 288 |
+
|
| 289 |
+
|
| 290 |
+
profiles = '''
|
| 291 |
+
### Other Web Apps:
|
| 292 |
+
|
| 293 |
+
* [Twitter Sentiment Analysis Flask App](https://firobeid.pythonanywhere.com/)
|
| 294 |
+
|
| 295 |
+
* [Personal Lectures @ UCBerkley Using Panel App](https://firobeid.github.io/compose-plots/script.html)
|
| 296 |
+
'''
|
| 297 |
+
pn.Row(pn.Column(widgets, profiles), pn.layout.Spacer(width=20), run).servable(target='main')
|