utils.py 1.6 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950
  1. import os
  2. import json
  3. import numpy as np
  4. import pandas as pd
  5. import pickle
  6. import datetime
  7. import warnings
  8. warnings.filterwarnings("ignore")
  9. class PreProcessing():
  10. """Custom Pre-Processing for our use-case
  11. """
  12. def __init__(self):
  13. pass
  14. def process(self, df):
  15. ''' Adds lags and day_of_week and month columns to dataframe'''
  16. index_name = df.index.name
  17. column_name = df.columns[0]
  18. df.index.max().date().month
  19. day_to_forecast = datetime.datetime(df.index.max().date().year,
  20. df.index.max().date().month,
  21. df.index.max().date().day + 1)
  22. date_arr = np.array([day_to_forecast + datetime.timedelta(hours=i) for i in range(24)])
  23. day_to_forecast_df = pd.DataFrame(np.zeros(24), date_arr, columns=[column_name])
  24. df = df.append(day_to_forecast_df)
  25. # Add Lags
  26. df_shifted = pd.concat([df]+[df[0:].shift(i) for i in range(24,73)]+[df[0:].shift(i) for i in range(168,193)], axis=1)
  27. df_shifted.columns = [column_name]+['{}h-{}'.format(column_name, i) for i in range(24,73)] + ['{}h-{}'.format(column_name, i) for i in range(168,193)]
  28. # Add day_of_week column
  29. df_shifted['day_of_week'] = pd.to_datetime(df_shifted.index)
  30. df_shifted['day_of_week'] = df_shifted.day_of_week.dt.day_name()
  31. # Add month abbreviature column
  32. df_shifted['month'] = pd.to_datetime(df_shifted.index).month_name()
  33. # Add one-hot Encoding
  34. df = pd.get_dummies(df_shifted).dropna()
  35. return df