Spaces:
Configuration error
Configuration error
Update preprocess.py
Browse files- preprocess.py +4 -3
preprocess.py
CHANGED
|
@@ -19,12 +19,13 @@ def parse(csv_path):
|
|
| 19 |
data.to_csv('data/02 drop_duplicates.csv', index=False)
|
| 20 |
|
| 21 |
# Step 2: Define the feature columns (X) and target column (y)
|
| 22 |
-
X = data[["name", "attendance percentage", "average sleep time", "average screen time"]] # Feature columns
|
| 23 |
-
|
|
|
|
| 24 |
|
| 25 |
# Normalize numerical features
|
| 26 |
scaler = MinMaxScaler()
|
| 27 |
-
numerical_features = ['user_depth', 'age_level', 'city_development_index', 'var_1']
|
| 28 |
data[numerical_features] = scaler.fit_transform(data[numerical_features])
|
| 29 |
data.to_csv('data/03 normalize.csv', index=False)
|
| 30 |
|
|
|
|
| 19 |
data.to_csv('data/02 drop_duplicates.csv', index=False)
|
| 20 |
|
| 21 |
# Step 2: Define the feature columns (X) and target column (y)
|
| 22 |
+
# X = data[["name", "attendance percentage", "average sleep time", "average screen time"]] # Feature columns
|
| 23 |
+
X = data[["DateTime","product","campaign_id","webpage_id","product_category_1","product_category_2","user_group_id","gender","age_level","user_depth","city_development_index","var_1"]] # Feature columns
|
| 24 |
+
y = data["is_click"] # Target column
|
| 25 |
|
| 26 |
# Normalize numerical features
|
| 27 |
scaler = MinMaxScaler()
|
| 28 |
+
numerical_features = ['product','campaign_id','webpage_id','user_depth',"product_category_1","product_category_2","user_group_id", 'age_level',"user_depth", 'city_development_index', 'var_1']
|
| 29 |
data[numerical_features] = scaler.fit_transform(data[numerical_features])
|
| 30 |
data.to_csv('data/03 normalize.csv', index=False)
|
| 31 |
|