Spaces:

Aryanjaiswal78231
/

math-score-ml-project

Running

math-score-ml-project / src /components /model_trainer.py

Aryan

added all environment project

a244ac5 about 2 months ago

4.5 kB

	# Libraries
	import os
	import sys
	import pandas as pd
	import numpy as np
	from dataclasses import dataclass


	# local Imports
	from src.exception import CustomException
	from src.logger import logging
	from src.utils import save_object , evaluate_models


	# Models
	from sklearn.linear_model import LinearRegression , Ridge, Lasso
	from sklearn.tree import DecisionTreeRegressor
	from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor , AdaBoostRegressor
	from sklearn.svm import SVR
	from sklearn.neighbors import KNeighborsRegressor
	from catboost import CatBoostRegressor
	from xgboost import XGBRegressor

	# Imports for hyperparameter tuning
	from sklearn.model_selection import GridSearchCV, RandomizedSearchCV

	# Metric Imports to test model performance
	from sklearn.metrics import r2_score
	@dataclass
	class ModelTrainerConfig:
	trained_model_file_path = os.path.join('artifacts', 'model.pkl')

	class ModelTrainer:
	def __init__(self):
	self.model_trainer_config = ModelTrainerConfig()

	def initiate_model_trainer(self, train_array, test_array):
	try:
	logging.info("Model Trainer initiated")
	X_train, y_train = train_array[:,:-1], train_array[:,-1]
	X_test, y_test = test_array[:,:-1], test_array[:,-1]

	models = {
	"Linear Regression": LinearRegression(),
	"Ridge": Ridge(),
	"Lasso": Lasso(),
	"Decision Tree": DecisionTreeRegressor(),
	"Random Forest": RandomForestRegressor(),
	"Gradient Boosting": GradientBoostingRegressor(),
	"AdaBoost": AdaBoostRegressor(),
	"SVR": SVR(),
	"KNeighbors": KNeighborsRegressor(),
	"CatBoost": CatBoostRegressor(verbose=False),
	"XGBRegressor": XGBRegressor()
	}

	params = {
	"Decision Tree": {
	'criterion': ['squared_error', 'friedman_mse', 'absolute_error', 'poisson'],
	'splitter': ['best', 'random'],
	'max_depth': [3, 5, 10, 15, 20, None]
	},
	"Random Forest": {
	'n_estimators': [50, 100, 200],
	'criterion': ['squared_error', 'friedman_mse', 'absolute_error', 'poisson'],
	'max_depth': [3, 5, 10, 15, 20, None]
	},
	"Gradient Boosting": {
	'learning_rate': [0.01, 0.1, 0.2, 0.3],
	'n_estimators': [50, 100, 200],
	'subsample': [0.6, 0.7, 0.8, 0.9, 1.0]
	},
	"SVR": {
	'kernel': ['linear', 'poly', 'rbf', 'sigmoid'],
	'C': [0.1, 1, 10, 100],
	'gamma': ['scale', 'auto']
	},
	"KNeighbors": {
	'n_neighbors': [3, 5, 7, 9],
	'weights': ['uniform', 'distance'],
	'metric': ['euclidean', 'manhattan']
	},
	"CatBoost": {
	'depth': [4, 6, 8],
	'learning_rate': [0.01, 0.1, 0.2],
	'iterations': [100, 200, 300]
	},
	"XGBRegressor": {
	'learning_rate': [0.01, 0.1, 0.2],
	'n_estimators': [100, 200, 300],
	'max_depth': [3, 5, 7]
	}
	}

	logging.info("Models defined for training")
	model_report = evaluate_models(X_train, y_train, X_test, y_test, models , params)


	logging.info("Model evaluation completed finding best model")
	best_model_name = max(model_report, key=model_report.get)
	best_model = models[best_model_name]
	best_r2_score = model_report[best_model_name]

	if(best_r2_score < 0.6):
	logging.info("No model found with R2 score greater than 0.6")
	raise CustomException("No best model found with R2 score greater than 0.6", sys)


	logging.info(f"Best Model: {best_model_name} with R2 Score: {best_r2_score}")

	save_object(
	file_path=self.model_trainer_config.trained_model_file_path,
	obj=best_model
	)

	return best_r2_score
	except Exception as e:
	logging.error("Error in Model Trainer {0}".format(e))
	raise CustomException(e, sys)