haileyhalimj@gmail.com
Recover and restore preprocessing improvements from d54de4e
8504f5a
import pandas as pd
import src.preprocess.extract as ex
def get_product_list(start_date=None):
"""
Get unique product list from demand data
Args:
start_date: start date to filter data. Required.
"""
demand = ex.read_orders_data(start_date=start_date)
print(demand["Material Number"].unique())
return demand["Material Number"].unique()
def get_employee_list():
employee = ex.read_employee_data()
employee = employee["Description"]
return employee["Employee_Type"].unique()
def get_released_product_list(start_date=None):
"""
get released product list from COOIS_Released_Prod_Orders.csv
Args:
start_date: start date to filter data. Required.
"""
released_orders = ex.read_orders_data(
start_date=start_date,
)
product_list = released_orders["Material Number"].unique().tolist()
print(f"Released products for date range {start_date}: {len(product_list)} products")
return product_list
def get_available_dates():
"""
get available all dates from COOIS_Released_Prod_Orders.csv
Returns:
tuple: (start_dates, end_dates) - unique start dates and end dates list
"""
released_orders = ex.read_orders_data()
released_orders["Basic start date"] = pd.to_datetime(released_orders["Basic start date"])
released_orders["Basic finish date"] = pd.to_datetime(released_orders["Basic finish date"])
start_dates = sorted(released_orders["Basic start date"].dt.date.unique())
end_dates = sorted(released_orders["Basic finish date"].dt.date.unique())
all_dates = sorted(set(start_dates + end_dates))
return all_dates, start_dates, end_dates
def get_date_ranges():
"""
get available (start_date, end_date) combinations
Returns:
list : available (start_date, end_date) combinations
"""
released_orders = ex.read_orders_data()
released_orders["Basic start date"] = pd.to_datetime(released_orders["Basic start date"])
released_orders["Basic finish date"] = pd.to_datetime(released_orders["Basic finish date"])
date_ranges = released_orders[["Basic start date", "Basic finish date"]].drop_duplicates()
date_ranges["start_date"] = date_ranges["Basic start date"].dt.date
date_ranges["end_date"] = date_ranges["Basic finish date"].dt.date
ranges = [(row["start_date"], row["end_date"]) for _, row in date_ranges.iterrows()]
ranges = sorted(set(ranges))
return ranges