KaiquanMah commited on
Commit
176b18e
·
verified ·
1 Parent(s): 11f2d7f

Update preprocess.py

Browse files
Files changed (1) hide show
  1. preprocess.py +4 -3
preprocess.py CHANGED
@@ -19,12 +19,13 @@ def parse(csv_path):
19
  data.to_csv('data/02 drop_duplicates.csv', index=False)
20
 
21
  # Step 2: Define the feature columns (X) and target column (y)
22
- X = data[["name", "attendance percentage", "average sleep time", "average screen time"]] # Feature columns
23
- y = data["grade"] # Target column
 
24
 
25
  # Normalize numerical features
26
  scaler = MinMaxScaler()
27
- numerical_features = ['user_depth', 'age_level', 'city_development_index', 'var_1']
28
  data[numerical_features] = scaler.fit_transform(data[numerical_features])
29
  data.to_csv('data/03 normalize.csv', index=False)
30
 
 
19
  data.to_csv('data/02 drop_duplicates.csv', index=False)
20
 
21
  # Step 2: Define the feature columns (X) and target column (y)
22
+ # X = data[["name", "attendance percentage", "average sleep time", "average screen time"]] # Feature columns
23
+ X = data[["DateTime","product","campaign_id","webpage_id","product_category_1","product_category_2","user_group_id","gender","age_level","user_depth","city_development_index","var_1"]] # Feature columns
24
+ y = data["is_click"] # Target column
25
 
26
  # Normalize numerical features
27
  scaler = MinMaxScaler()
28
+ numerical_features = ['product','campaign_id','webpage_id','user_depth',"product_category_1","product_category_2","user_group_id", 'age_level',"user_depth", 'city_development_index', 'var_1']
29
  data[numerical_features] = scaler.fit_transform(data[numerical_features])
30
  data.to_csv('data/03 normalize.csv', index=False)
31