import os import json import numpy as np import pandas as pd import pickle import datetime import warnings warnings.filterwarnings("ignore") class PreProcessing(): """Custom Pre-Processing for our use-case """ def __init__(self): pass def process(self, df): ''' Adds lags and day_of_week and month columns to dataframe''' index_name = df.index.name column_name = df.columns[0] df.index.max().date().month day_to_forecast = datetime.datetime(df.index.max().date().year, df.index.max().date().month, df.index.max().date().day + 1) date_arr = np.array([day_to_forecast + datetime.timedelta(hours=i) for i in range(24)]) day_to_forecast_df = pd.DataFrame(np.zeros(24), date_arr, columns=[column_name]) df = df.append(day_to_forecast_df) # Add Lags df_shifted = pd.concat([df]+[df[0:].shift(i) for i in range(24,73)]+[df[0:].shift(i) for i in range(168,193)], axis=1) df_shifted.columns = [column_name]+['{}h-{}'.format(column_name, i) for i in range(24,73)] + ['{}h-{}'.format(column_name, i) for i in range(168,193)] # Add day_of_week column df_shifted['day_of_week'] = pd.to_datetime(df_shifted.index) df_shifted['day_of_week'] = df_shifted.day_of_week.dt.day_name() # Add month abbreviature column df_shifted['month'] = pd.to_datetime(df_shifted.index).month_name() # Add one-hot Encoding df = pd.get_dummies(df_shifted).dropna() return df