oscarleiva
/
mexico-pml-forecast


			
							1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950
							import os 
import json
import numpy as np
import pandas as pd
import pickle
import datetime

import warnings
warnings.filterwarnings("ignore")


class PreProcessing():
    """Custom Pre-Processing for our use-case
    """

    def __init__(self):
        pass

    def process(self, df):
        ''' Adds lags  and day_of_week and month columns to dataframe'''
        
        index_name = df.index.name
        column_name = df.columns[0]
        
        df.index.max().date().month
        day_to_forecast = datetime.datetime(df.index.max().date().year,
                            df.index.max().date().month,
                            df.index.max().date().day + 1)
        date_arr = np.array([day_to_forecast + datetime.timedelta(hours=i) for i in range(24)])
        
        day_to_forecast_df = pd.DataFrame(np.zeros(24), date_arr, columns=[column_name])
        
        df = df.append(day_to_forecast_df)
        
        # Add Lags
        df_shifted = pd.concat([df]+[df[0:].shift(i) for i in range(24,73)]+[df[0:].shift(i) for i in range(168,193)], axis=1)
        df_shifted.columns = [column_name]+['{}h-{}'.format(column_name, i) for i in range(24,73)] + ['{}h-{}'.format(column_name, i) for i in range(168,193)]
        
        # Add day_of_week column
        df_shifted['day_of_week'] = pd.to_datetime(df_shifted.index)
        df_shifted['day_of_week'] = df_shifted.day_of_week.dt.day_name()
        
        # Add month abbreviature column
        df_shifted['month'] = pd.to_datetime(df_shifted.index).month_name()
        
        # Add one-hot Encoding
        df = pd.get_dummies(df_shifted).dropna()
        
        return df