Spaces:
Build error
Build error
| import os | |
| import sys | |
| from dataclasses import dataclass | |
| from sklearn.metrics import r2_score | |
| from sklearn.linear_model import LinearRegression | |
| from sklearn.neighbors import KNeighborsRegressor | |
| from sklearn.tree import DecisionTreeRegressor | |
| from sklearn.ensemble import ( | |
| RandomForestRegressor, | |
| AdaBoostRegressor, | |
| GradientBoostingRegressor, | |
| ) | |
| from xgboost import XGBRegressor | |
| from catboost import CatBoostRegressor | |
| from src.logger import logging | |
| from src.exception import CustomException | |
| from src.utils import save_object, evaluate_models | |
| class ModelTrainerConfig: | |
| trained_model_file_path = os.path.join("artifacts", "model.pkl") | |
| class ModelTrainer: | |
| def __init__(self) -> None: | |
| self.model_trainer_config = ModelTrainerConfig() | |
| def initiate_model_trainer(self, train_array, test_array): | |
| try: | |
| logging.info("Split training and testing input data") | |
| X_train, y_train, X_test, y_test = ( | |
| train_array[:, :-1], | |
| train_array[:, -1], | |
| test_array[:, :-1], | |
| test_array[:, -1], | |
| ) | |
| models = { | |
| "Linear Regression": LinearRegression(), | |
| "K-Neighbors Regressor": KNeighborsRegressor(), | |
| "Decision Tree Regressor": DecisionTreeRegressor(), | |
| "Random Forest Regressor": RandomForestRegressor(), | |
| "AdaBoost Regressor": AdaBoostRegressor(), | |
| "Gradient Boosting Regressor": GradientBoostingRegressor(), | |
| "XGBRegressor": XGBRegressor(), | |
| "CatBoosting Regressor": CatBoostRegressor(verbose=False), | |
| } | |
| params_grid = { | |
| "Linear Regression": {}, | |
| "K-Neighbors Regressor": {}, | |
| "Decision Tree Regressor": { | |
| "criterion": [ | |
| "squared_error", | |
| "friedman_mse", | |
| "absolute_error", | |
| "poisson", | |
| ], | |
| # 'splitter':['best','random'], | |
| # 'max_features':['sqrt','log2'], | |
| }, | |
| "Random Forest Regressor": { | |
| # 'criterion':['squared_error', 'friedman_mse', 'absolute_error', 'poisson'], | |
| # 'max_features':['sqrt','log2',None], | |
| "n_estimators": [8, 16, 32, 64, 128, 256] | |
| }, | |
| "AdaBoost Regressor": { | |
| "learning_rate": [0.1, 0.01, 0.5, 0.001], | |
| # 'loss':['linear','square','exponential'], | |
| "n_estimators": [8, 16, 32, 64, 128, 256], | |
| }, | |
| "Gradient Boosting Regressor": { | |
| # 'loss':['squared_error', 'huber', 'absolute_error', 'quantile'], | |
| "learning_rate": [0.1, 0.01, 0.05, 0.001], | |
| "subsample": [0.6, 0.7, 0.75, 0.8, 0.85, 0.9], | |
| # 'criterion':['squared_error', 'friedman_mse'], | |
| # 'max_features':['auto','sqrt','log2'], | |
| "n_estimators": [8, 16, 32, 64, 128, 256], | |
| }, | |
| "XGBRegressor": { | |
| "learning_rate": [0.1, 0.01, 0.05, 0.001], | |
| "n_estimators": [8, 16, 32, 64, 128, 256], | |
| }, | |
| "CatBoosting Regressor": { | |
| "depth": [6, 8, 10], | |
| "learning_rate": [0.01, 0.05, 0.1], | |
| "iterations": [30, 50, 100], | |
| }, | |
| } | |
| model_report: dict = evaluate_models( | |
| X_train=X_train, | |
| y_train=y_train, | |
| X_test=X_test, | |
| y_test=y_test, | |
| models=models, | |
| params_grid=params_grid, | |
| ) | |
| # To get best model score from dict | |
| best_model_score = max(sorted(model_report.values())) | |
| # To get best model name from dict | |
| best_model_name = list(model_report.keys())[ | |
| list(model_report.values()).index(best_model_score) | |
| ] | |
| best_model = models[best_model_name] | |
| if best_model_score < 0.6: | |
| raise CustomException("No best model found", sys) | |
| logging.info(f"Best found model on both training and testing dataset") | |
| save_object( | |
| file_path=self.model_trainer_config.trained_model_file_path, | |
| obj=best_model, | |
| ) | |
| print(best_model_name) | |
| predicted = best_model.predict(X_test) | |
| r2_square = r2_score(y_test, predicted) | |
| return r2_square | |
| except Exception as e: | |
| raise CustomException(e, sys) | |