| 1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950 |
- import os
- import json
- import numpy as np
- import pandas as pd
- import pickle
- import datetime
- import warnings
- warnings.filterwarnings("ignore")
- class PreProcessing():
- """Custom Pre-Processing for our use-case
- """
- def __init__(self):
- pass
- def process(self, df):
- ''' Adds lags and day_of_week and month columns to dataframe'''
-
- index_name = df.index.name
- column_name = df.columns[0]
-
- df.index.max().date().month
- day_to_forecast = datetime.datetime(df.index.max().date().year,
- df.index.max().date().month,
- df.index.max().date().day + 1)
- date_arr = np.array([day_to_forecast + datetime.timedelta(hours=i) for i in range(24)])
-
- day_to_forecast_df = pd.DataFrame(np.zeros(24), date_arr, columns=[column_name])
-
- df = df.append(day_to_forecast_df)
-
- # Add Lags
- df_shifted = pd.concat([df]+[df[0:].shift(i) for i in range(24,73)]+[df[0:].shift(i) for i in range(168,193)], axis=1)
- df_shifted.columns = [column_name]+['{}h-{}'.format(column_name, i) for i in range(24,73)] + ['{}h-{}'.format(column_name, i) for i in range(168,193)]
-
- # Add day_of_week column
- df_shifted['day_of_week'] = pd.to_datetime(df_shifted.index)
- df_shifted['day_of_week'] = df_shifted.day_of_week.dt.day_name()
-
- # Add month abbreviature column
- df_shifted['month'] = pd.to_datetime(df_shifted.index).month_name()
-
- # Add one-hot Encoding
- df = pd.get_dummies(df_shifted).dropna()
-
- return df
|