Aryan
added all environment project
a244ac5
import os
import sys
from src.exception import CustomException
from src.logger import logging
import pandas as pd
from sklearn.model_selection import train_test_split
from dataclasses import dataclass
@dataclass
class DataIngestionConfig:
train_data_path: str
test_data_path: str
raw_data_path: str
def __init__(self):
self.train_data_path = os.path.join('artifacts', 'train.csv')
self.test_data_path = os.path.join('artifacts', 'test.csv')
self.raw_data_path = os.path.join('artifacts', 'data.csv')
class DataIngestion:
def __init__(self):
self.ingestion_config = DataIngestionConfig()
def initiate_data_ingestion(self):
logging.info("Data Ingestion started")
try:
df = pd.read_csv('/Users/aryan/Documents/aryan/ml-projects/project-1/notebook/data/StudentsPerformance.csv')
logging.info("Dataset read as dataframe")
os.makedirs(os.path.dirname(self.ingestion_config.raw_data_path), exist_ok=True)
df.to_csv(self.ingestion_config.raw_data_path, index=False)
logging.info("Raw data saved")
train_set, test_set = train_test_split(df, test_size=0.2, random_state=42)
train_set.to_csv(self.ingestion_config.train_data_path, index=False)
test_set.to_csv(self.ingestion_config.test_data_path, index=False)
logging.info("Train and Test data saved")
return (
self.ingestion_config.train_data_path,
self.ingestion_config.test_data_path
)
except Exception as e:
logging.error("Error in Data Ingestion {0}".format(e))
raise CustomException(e, sys)
if __name__ == "__main__":
obj = DataIngestion()
obj.initiate_data_ingestion()