{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": { "collapsed": true }, "outputs": [], "source": [ "%matplotlib notebook\n", "import pandas as pd\n", "import numpy as np" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
PPh-24Ph-25Ph-26Ph-27Ph-28Ph-29Ph-30Ph-31Ph-32...month_Decmonth_Febmonth_Janmonth_Julmonth_Junmonth_Marmonth_Maymonth_Novmonth_Octmonth_Sep
Fecha_hora
2016-08-09 00:00:0095.326454.846145.198244.712346.299542.942244.355146.344045.682642.6676...0000000000
2016-08-09 01:00:0082.853252.217954.846145.198244.712346.299542.942244.355146.344045.6826...0000000000
2016-08-09 02:00:0057.976461.919852.217954.846145.198244.712346.299542.942244.355146.3440...0000000000
2016-08-09 03:00:0067.640736.810561.919852.217954.846145.198244.712346.299542.942244.3551...0000000000
2016-08-09 04:00:0066.489234.827436.810561.919852.217954.846145.198244.712346.299542.9422...0000000000
\n", "

5 rows × 94 columns

\n", "
" ], "text/plain": [ " P Ph-24 Ph-25 Ph-26 Ph-27 Ph-28 \\\n", "Fecha_hora \n", "2016-08-09 00:00:00 95.3264 54.8461 45.1982 44.7123 46.2995 42.9422 \n", "2016-08-09 01:00:00 82.8532 52.2179 54.8461 45.1982 44.7123 46.2995 \n", "2016-08-09 02:00:00 57.9764 61.9198 52.2179 54.8461 45.1982 44.7123 \n", "2016-08-09 03:00:00 67.6407 36.8105 61.9198 52.2179 54.8461 45.1982 \n", "2016-08-09 04:00:00 66.4892 34.8274 36.8105 61.9198 52.2179 54.8461 \n", "\n", " Ph-29 Ph-30 Ph-31 Ph-32 ... month_Dec \\\n", "Fecha_hora ... \n", "2016-08-09 00:00:00 44.3551 46.3440 45.6826 42.6676 ... 0 \n", "2016-08-09 01:00:00 42.9422 44.3551 46.3440 45.6826 ... 0 \n", "2016-08-09 02:00:00 46.2995 42.9422 44.3551 46.3440 ... 0 \n", "2016-08-09 03:00:00 44.7123 46.2995 42.9422 44.3551 ... 0 \n", "2016-08-09 04:00:00 45.1982 44.7123 46.2995 42.9422 ... 0 \n", "\n", " month_Feb month_Jan month_Jul month_Jun month_Mar \\\n", "Fecha_hora \n", "2016-08-09 00:00:00 0 0 0 0 0 \n", "2016-08-09 01:00:00 0 0 0 0 0 \n", "2016-08-09 02:00:00 0 0 0 0 0 \n", "2016-08-09 03:00:00 0 0 0 0 0 \n", "2016-08-09 04:00:00 0 0 0 0 0 \n", "\n", " month_May month_Nov month_Oct month_Sep \n", "Fecha_hora \n", "2016-08-09 00:00:00 0 0 0 0 \n", "2016-08-09 01:00:00 0 0 0 0 \n", "2016-08-09 02:00:00 0 0 0 0 \n", "2016-08-09 03:00:00 0 0 0 0 \n", "2016-08-09 04:00:00 0 0 0 0 \n", "\n", "[5 rows x 94 columns]" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "fname = \"lbrprices_transformed_v2.csv\"\n", "prices = pd.read_csv(fname, index_col=\"Fecha_hora\").dropna()\n", "prices.head(5)" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
PPh-24Ph-25Ph-26Ph-27Ph-28Ph-29Ph-30Ph-31Ph-32...month_Decmonth_Febmonth_Janmonth_Julmonth_Junmonth_Marmonth_Maymonth_Novmonth_Octmonth_Sep
count20520.00000020520.00000020520.00000020520.00000020520.00000020520.00000020520.00000020520.00000020520.00000020520.000000...20520.00000020520.00000020520.00000020520.00000020520.00000020520.00000020520.00000020520.00000020520.00000020520.000000
mean72.09387372.05337172.05126272.04838872.04358372.03996972.03583172.03290472.02973872.025476...0.0853800.0654970.0725150.0725150.0701750.0725150.0725150.1052630.1087720.105263
std34.18361534.17671434.17703634.17685434.17354434.17270034.17091134.17053934.16997134.168179...0.2794530.2474070.2593440.2593440.2554490.2593440.2593440.3069000.3113600.306900
min18.70490018.70490018.70490018.70490018.70490018.70490018.70490018.70490018.70490018.704900...0.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.000000
25%48.16440048.15647548.15417548.15225048.14730048.14537548.14050048.13460048.13250048.130500...0.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.000000
50%65.08430065.01010065.00850065.00725065.00225064.99660064.99510064.99360064.98955064.985950...0.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.000000
75%88.62092588.54180088.54180088.53145088.52645088.52142588.51782588.51672588.51367588.496425...0.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.000000
max329.440000329.440000329.440000329.440000329.440000329.440000329.440000329.440000329.440000329.440000...1.0000001.0000001.0000001.0000001.0000001.0000001.0000001.0000001.0000001.000000
\n", "

8 rows × 94 columns

\n", "
" ], "text/plain": [ " P Ph-24 Ph-25 Ph-26 Ph-27 \\\n", "count 20520.000000 20520.000000 20520.000000 20520.000000 20520.000000 \n", "mean 72.093873 72.053371 72.051262 72.048388 72.043583 \n", "std 34.183615 34.176714 34.177036 34.176854 34.173544 \n", "min 18.704900 18.704900 18.704900 18.704900 18.704900 \n", "25% 48.164400 48.156475 48.154175 48.152250 48.147300 \n", "50% 65.084300 65.010100 65.008500 65.007250 65.002250 \n", "75% 88.620925 88.541800 88.541800 88.531450 88.526450 \n", "max 329.440000 329.440000 329.440000 329.440000 329.440000 \n", "\n", " Ph-28 Ph-29 Ph-30 Ph-31 Ph-32 \\\n", "count 20520.000000 20520.000000 20520.000000 20520.000000 20520.000000 \n", "mean 72.039969 72.035831 72.032904 72.029738 72.025476 \n", "std 34.172700 34.170911 34.170539 34.169971 34.168179 \n", "min 18.704900 18.704900 18.704900 18.704900 18.704900 \n", "25% 48.145375 48.140500 48.134600 48.132500 48.130500 \n", "50% 64.996600 64.995100 64.993600 64.989550 64.985950 \n", "75% 88.521425 88.517825 88.516725 88.513675 88.496425 \n", "max 329.440000 329.440000 329.440000 329.440000 329.440000 \n", "\n", " ... month_Dec month_Feb month_Jan month_Jul \\\n", "count ... 20520.000000 20520.000000 20520.000000 20520.000000 \n", "mean ... 0.085380 0.065497 0.072515 0.072515 \n", "std ... 0.279453 0.247407 0.259344 0.259344 \n", "min ... 0.000000 0.000000 0.000000 0.000000 \n", "25% ... 0.000000 0.000000 0.000000 0.000000 \n", "50% ... 0.000000 0.000000 0.000000 0.000000 \n", "75% ... 0.000000 0.000000 0.000000 0.000000 \n", "max ... 1.000000 1.000000 1.000000 1.000000 \n", "\n", " month_Jun month_Mar month_May month_Nov month_Oct \\\n", "count 20520.000000 20520.000000 20520.000000 20520.000000 20520.000000 \n", "mean 0.070175 0.072515 0.072515 0.105263 0.108772 \n", "std 0.255449 0.259344 0.259344 0.306900 0.311360 \n", "min 0.000000 0.000000 0.000000 0.000000 0.000000 \n", "25% 0.000000 0.000000 0.000000 0.000000 0.000000 \n", "50% 0.000000 0.000000 0.000000 0.000000 0.000000 \n", "75% 0.000000 0.000000 0.000000 0.000000 0.000000 \n", "max 1.000000 1.000000 1.000000 1.000000 1.000000 \n", "\n", " month_Sep \n", "count 20520.000000 \n", "mean 0.105263 \n", "std 0.306900 \n", "min 0.000000 \n", "25% 0.000000 \n", "50% 0.000000 \n", "75% 0.000000 \n", "max 1.000000 \n", "\n", "[8 rows x 94 columns]" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "prices.describe(include=['object', 'int', 'float'])" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Choosing features and targe" ] }, { "cell_type": "code", "execution_count": 6, "metadata": { "collapsed": true }, "outputs": [], "source": [ "X = prices.drop([\"P\"], axis=1)\n", "y = prices[\"P\"]" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Fecha_hora\n", "2016-08-09 00:00:00 95.3264\n", "2016-08-09 01:00:00 82.8532\n", "2016-08-09 02:00:00 57.9764\n", "2016-08-09 03:00:00 67.6407\n", "2016-08-09 04:00:00 66.4892\n", "Name: P, dtype: float64" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "y.head(5)" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Ph-24Ph-25Ph-26Ph-27Ph-28Ph-29Ph-30Ph-31Ph-32Ph-33...month_Decmonth_Febmonth_Janmonth_Julmonth_Junmonth_Marmonth_Maymonth_Novmonth_Octmonth_Sep
Fecha_hora
2016-08-09 00:00:0054.846145.198244.712346.299542.942244.355146.344045.682642.667639.0872...0000000000
2016-08-09 01:00:0052.217954.846145.198244.712346.299542.942244.355146.344045.682642.6676...0000000000
2016-08-09 02:00:0061.919852.217954.846145.198244.712346.299542.942244.355146.344045.6826...0000000000
2016-08-09 03:00:0036.810561.919852.217954.846145.198244.712346.299542.942244.355146.3440...0000000000
2016-08-09 04:00:0034.827436.810561.919852.217954.846145.198244.712346.299542.942244.3551...0000000000
\n", "

5 rows × 93 columns

\n", "
" ], "text/plain": [ " Ph-24 Ph-25 Ph-26 Ph-27 Ph-28 Ph-29 \\\n", "Fecha_hora \n", "2016-08-09 00:00:00 54.8461 45.1982 44.7123 46.2995 42.9422 44.3551 \n", "2016-08-09 01:00:00 52.2179 54.8461 45.1982 44.7123 46.2995 42.9422 \n", "2016-08-09 02:00:00 61.9198 52.2179 54.8461 45.1982 44.7123 46.2995 \n", "2016-08-09 03:00:00 36.8105 61.9198 52.2179 54.8461 45.1982 44.7123 \n", "2016-08-09 04:00:00 34.8274 36.8105 61.9198 52.2179 54.8461 45.1982 \n", "\n", " Ph-30 Ph-31 Ph-32 Ph-33 ... month_Dec \\\n", "Fecha_hora ... \n", "2016-08-09 00:00:00 46.3440 45.6826 42.6676 39.0872 ... 0 \n", "2016-08-09 01:00:00 44.3551 46.3440 45.6826 42.6676 ... 0 \n", "2016-08-09 02:00:00 42.9422 44.3551 46.3440 45.6826 ... 0 \n", "2016-08-09 03:00:00 46.2995 42.9422 44.3551 46.3440 ... 0 \n", "2016-08-09 04:00:00 44.7123 46.2995 42.9422 44.3551 ... 0 \n", "\n", " month_Feb month_Jan month_Jul month_Jun month_Mar \\\n", "Fecha_hora \n", "2016-08-09 00:00:00 0 0 0 0 0 \n", "2016-08-09 01:00:00 0 0 0 0 0 \n", "2016-08-09 02:00:00 0 0 0 0 0 \n", "2016-08-09 03:00:00 0 0 0 0 0 \n", "2016-08-09 04:00:00 0 0 0 0 0 \n", "\n", " month_May month_Nov month_Oct month_Sep \n", "Fecha_hora \n", "2016-08-09 00:00:00 0 0 0 0 \n", "2016-08-09 01:00:00 0 0 0 0 \n", "2016-08-09 02:00:00 0 0 0 0 \n", "2016-08-09 03:00:00 0 0 0 0 \n", "2016-08-09 04:00:00 0 0 0 0 \n", "\n", "[5 rows x 93 columns]" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "X.head(5)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Train/test split" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Training features/target: (15390, 93) (15390,)\n", "Testing freatures/targe: (5130, 93) (5130,)\n" ] } ], "source": [ "from sklearn.model_selection import train_test_split\n", "X_train, X_test, y_train, y_test = train_test_split(X,y, random_state = 1)\n", "print(\"Training features/target: \", X_train.shape, y_train.shape)\n", "print(\"Testing freatures/targe: \", X_test.shape, y_test.shape)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Scalaling Features" ] }, { "cell_type": "code", "execution_count": 10, "metadata": { "collapsed": true }, "outputs": [], "source": [ "y_train = y_train.values.reshape(-1 ,1)\n", "y_test = y_test.values.reshape(-1 , 1)" ] }, { "cell_type": "markdown", "metadata": { "collapsed": true }, "source": [ "## Cross-Validation and Hyper-parameters | Decision Tree Regressor" ] }, { "cell_type": "code", "execution_count": 11, "metadata": { "collapsed": true }, "outputs": [], "source": [ "# Import Pipeline and GridSearchCV\n", "from sklearn.pipeline import Pipeline\n", "from sklearn.model_selection import GridSearchCV\n", "from sklearn.preprocessing import StandardScaler\n", "from sklearn.tree import DecisionTreeRegressor" ] }, { "cell_type": "code", "execution_count": 12, "metadata": { "collapsed": true }, "outputs": [], "source": [ "pipe = Pipeline([\n", " ('standarscaler', StandardScaler()),\n", " ('regressor', DecisionTreeRegressor(max_depth=8))\n", "])" ] }, { "cell_type": "code", "execution_count": 23, "metadata": { "scrolled": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Fitting 5 folds for each of 13 candidates, totalling 65 fits\n", "[CV] regressor__max_depth=3 ..........................................\n", "[CV] . regressor__max_depth=3, score=0.5169398553533737, total= 0.4s\n", "[CV] regressor__max_depth=3 ..........................................\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "[Parallel(n_jobs=1)]: Done 1 out of 1 | elapsed: 0.4s remaining: 0.0s\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "[CV] . regressor__max_depth=3, score=0.5292431314738588, total= 0.4s\n", "[CV] regressor__max_depth=3 ..........................................\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "[Parallel(n_jobs=1)]: Done 2 out of 2 | elapsed: 0.8s remaining: 0.0s\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "[CV] . regressor__max_depth=3, score=0.5271596622666745, total= 0.4s\n", "[CV] regressor__max_depth=3 ..........................................\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "[Parallel(n_jobs=1)]: Done 3 out of 3 | elapsed: 1.3s remaining: 0.0s\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "[CV] . regressor__max_depth=3, score=0.5189516721105789, total= 0.4s\n", "[CV] regressor__max_depth=3 ..........................................\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "[Parallel(n_jobs=1)]: Done 4 out of 4 | elapsed: 1.7s remaining: 0.0s\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "[CV] . regressor__max_depth=3, score=0.5328275859007272, total= 0.4s\n", "[CV] regressor__max_depth=4 ..........................................\n", "[CV] . regressor__max_depth=4, score=0.5569892983602978, total= 0.5s\n", "[CV] regressor__max_depth=4 ..........................................\n", "[CV] . regressor__max_depth=4, score=0.5718718945800592, total= 0.5s\n", "[CV] regressor__max_depth=4 ..........................................\n", "[CV] . regressor__max_depth=4, score=0.5705645799913583, total= 0.5s\n", "[CV] regressor__max_depth=4 ..........................................\n", "[CV] . regressor__max_depth=4, score=0.5683469295684258, total= 0.5s\n", "[CV] regressor__max_depth=4 ..........................................\n", "[CV] . regressor__max_depth=4, score=0.5634864084931571, total= 0.5s\n", "[CV] regressor__max_depth=5 ..........................................\n", "[CV] . regressor__max_depth=5, score=0.5755784603496508, total= 0.6s\n", "[CV] regressor__max_depth=5 ..........................................\n", "[CV] . regressor__max_depth=5, score=0.6070172485297564, total= 0.6s\n", "[CV] regressor__max_depth=5 ..........................................\n", "[CV] . regressor__max_depth=5, score=0.5976264926331821, total= 0.7s\n", "[CV] regressor__max_depth=5 ..........................................\n", "[CV] . regressor__max_depth=5, score=0.5802800801870673, total= 0.6s\n", "[CV] regressor__max_depth=5 ..........................................\n", "[CV] . regressor__max_depth=5, score=0.5909206393516135, total= 0.6s\n", "[CV] regressor__max_depth=6 ..........................................\n", "[CV] . regressor__max_depth=6, score=0.6059512783528294, total= 0.7s\n", "[CV] regressor__max_depth=6 ..........................................\n", "[CV] .. regressor__max_depth=6, score=0.628302461531919, total= 0.7s\n", "[CV] regressor__max_depth=6 ..........................................\n", "[CV] . regressor__max_depth=6, score=0.5939095662582892, total= 0.7s\n", "[CV] regressor__max_depth=6 ..........................................\n", "[CV] .. regressor__max_depth=6, score=0.602608087879215, total= 0.7s\n", "[CV] regressor__max_depth=6 ..........................................\n", "[CV] . regressor__max_depth=6, score=0.6212872388320132, total= 0.7s\n", "[CV] regressor__max_depth=7 ..........................................\n", "[CV] . regressor__max_depth=7, score=0.6137124228075161, total= 0.9s\n", "[CV] regressor__max_depth=7 ..........................................\n", "[CV] . regressor__max_depth=7, score=0.6151119363908413, total= 0.9s\n", "[CV] regressor__max_depth=7 ..........................................\n", "[CV] .... regressor__max_depth=7, score=0.6035273269369, total= 0.9s\n", "[CV] regressor__max_depth=7 ..........................................\n", "[CV] . regressor__max_depth=7, score=0.6169263342556857, total= 0.9s\n", "[CV] regressor__max_depth=7 ..........................................\n", "[CV] . regressor__max_depth=7, score=0.6312721893001614, total= 0.9s\n", "[CV] regressor__max_depth=8 ..........................................\n", "[CV] .. regressor__max_depth=8, score=0.616090697304017, total= 1.1s\n", "[CV] regressor__max_depth=8 ..........................................\n", "[CV] . regressor__max_depth=8, score=0.6085141359390052, total= 1.0s\n", "[CV] regressor__max_depth=8 ..........................................\n", "[CV] . regressor__max_depth=8, score=0.5804836936155511, total= 0.7s\n", "[CV] regressor__max_depth=8 ..........................................\n", "[CV] . regressor__max_depth=8, score=0.6298486549529034, total= 0.7s\n", "[CV] regressor__max_depth=8 ..........................................\n", "[CV] . regressor__max_depth=8, score=0.6288574935330786, total= 0.7s\n", "[CV] regressor__max_depth=9 ..........................................\n", "[CV] . regressor__max_depth=9, score=0.6070635264279656, total= 0.8s\n", "[CV] regressor__max_depth=9 ..........................................\n", "[CV] . regressor__max_depth=9, score=0.6079616981779048, total= 0.9s\n", "[CV] regressor__max_depth=9 ..........................................\n", "[CV] . regressor__max_depth=9, score=0.6060849679553317, total= 0.8s\n", "[CV] regressor__max_depth=9 ..........................................\n", "[CV] . regressor__max_depth=9, score=0.6342373366266129, total= 0.8s\n", "[CV] regressor__max_depth=9 ..........................................\n", "[CV] . regressor__max_depth=9, score=0.6195804618417622, total= 1.0s\n", "[CV] regressor__max_depth=10 .........................................\n", "[CV] regressor__max_depth=10, score=0.5919827391435091, total= 1.0s\n", "[CV] regressor__max_depth=10 .........................................\n", "[CV] . regressor__max_depth=10, score=0.591163529137293, total= 0.9s\n", "[CV] regressor__max_depth=10 .........................................\n", "[CV] regressor__max_depth=10, score=0.6058360615686575, total= 0.9s\n", "[CV] regressor__max_depth=10 .........................................\n", "[CV] regressor__max_depth=10, score=0.6424703429332514, total= 0.9s\n", "[CV] regressor__max_depth=10 .........................................\n", "[CV] regressor__max_depth=10, score=0.6398478330529145, total= 0.9s\n", "[CV] regressor__max_depth=11 .........................................\n", "[CV] regressor__max_depth=11, score=0.5701732792622907, total= 1.0s\n", "[CV] regressor__max_depth=11 .........................................\n", "[CV] regressor__max_depth=11, score=0.5661969680976917, total= 1.3s\n", "[CV] regressor__max_depth=11 .........................................\n", "[CV] regressor__max_depth=11, score=0.5844198200947085, total= 1.1s\n", "[CV] regressor__max_depth=11 .........................................\n", "[CV] regressor__max_depth=11, score=0.6473912426875859, total= 0.9s\n", "[CV] regressor__max_depth=11 .........................................\n", "[CV] regressor__max_depth=11, score=0.6478204733367422, total= 1.0s\n", "[CV] regressor__max_depth=12 .........................................\n", "[CV] . regressor__max_depth=12, score=0.564342995041403, total= 1.0s\n", "[CV] regressor__max_depth=12 .........................................\n", "[CV] .. regressor__max_depth=12, score=0.56464709815865, total= 1.0s\n", "[CV] regressor__max_depth=12 .........................................\n", "[CV] regressor__max_depth=12, score=0.5691740864798083, total= 1.1s\n", "[CV] regressor__max_depth=12 .........................................\n", "[CV] regressor__max_depth=12, score=0.6392364315887575, total= 1.1s\n", "[CV] regressor__max_depth=12 .........................................\n", "[CV] regressor__max_depth=12, score=0.6384692454062504, total= 1.0s\n", "[CV] regressor__max_depth=13 .........................................\n", "[CV] regressor__max_depth=13, score=0.5523438620310935, total= 1.1s\n", "[CV] regressor__max_depth=13 .........................................\n", "[CV] regressor__max_depth=13, score=0.5430819073602634, total= 1.1s\n", "[CV] regressor__max_depth=13 .........................................\n", "[CV] regressor__max_depth=13, score=0.5327778363292415, total= 1.1s\n", "[CV] regressor__max_depth=13 .........................................\n", "[CV] regressor__max_depth=13, score=0.6079231207296709, total= 1.1s\n", "[CV] regressor__max_depth=13 .........................................\n", "[CV] regressor__max_depth=13, score=0.6267399814895201, total= 1.1s\n", "[CV] regressor__max_depth=14 .........................................\n", "[CV] regressor__max_depth=14, score=0.5375847241312031, total= 1.1s\n", "[CV] regressor__max_depth=14 .........................................\n", "[CV] regressor__max_depth=14, score=0.5090921616961444, total= 1.2s\n", "[CV] regressor__max_depth=14 .........................................\n", "[CV] regressor__max_depth=14, score=0.5328989388967353, total= 2.0s\n", "[CV] regressor__max_depth=14 .........................................\n", "[CV] regressor__max_depth=14, score=0.6161752692167073, total= 2.3s\n", "[CV] regressor__max_depth=14 .........................................\n", "[CV] . regressor__max_depth=14, score=0.602342823407813, total= 1.7s\n", "[CV] regressor__max_depth=15 .........................................\n", "[CV] regressor__max_depth=15, score=0.5270248181929342, total= 1.7s\n", "[CV] regressor__max_depth=15 .........................................\n", "[CV] regressor__max_depth=15, score=0.5106190857782906, total= 1.3s\n", "[CV] regressor__max_depth=15 .........................................\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "[CV] regressor__max_depth=15, score=0.5544623006601073, total= 1.2s\n", "[CV] regressor__max_depth=15 .........................................\n", "[CV] regressor__max_depth=15, score=0.5787527037563012, total= 1.2s\n", "[CV] regressor__max_depth=15 .........................................\n", "[CV] . regressor__max_depth=15, score=0.591278105782729, total= 1.2s\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "[Parallel(n_jobs=1)]: Done 65 out of 65 | elapsed: 1.0min finished\n" ] }, { "data": { "text/plain": [ "0.65082387928641783" ] }, "execution_count": 23, "metadata": {}, "output_type": "execute_result" } ], "source": [ "hyperparameters = {'regressor__max_depth': range(3,16)}\n", "\n", "rgs = GridSearchCV(pipe, hyperparameters, cv=5, scoring='r2', verbose=5)\n", "rgs.fit(X_train, y_train)\n", "rgs.score(X_test, y_test)" ] }, { "cell_type": "code", "execution_count": 24, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0.65082387928641783" ] }, "execution_count": 24, "metadata": {}, "output_type": "execute_result" } ], "source": [ "rgs.score(X_test, y_test)" ] }, { "cell_type": "code", "execution_count": 25, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "{'regressor__max_depth': 7}" ] }, "execution_count": 25, "metadata": {}, "output_type": "execute_result" } ], "source": [ "rgs.best_params_" ] }, { "cell_type": "code", "execution_count": 26, "metadata": { "collapsed": true }, "outputs": [], "source": [ "y_pred = rgs.predict(X_test)" ] }, { "cell_type": "code", "execution_count": 18, "metadata": { "collapsed": true }, "outputs": [], "source": [ "import matplotlib.pyplot as plt" ] }, { "cell_type": "code", "execution_count": 27, "metadata": {}, "outputs": [ { "data": { "application/javascript": [ "/* Put everything inside the global mpl namespace */\n", "window.mpl = {};\n", "\n", "\n", "mpl.get_websocket_type = function() {\n", " if (typeof(WebSocket) !== 'undefined') {\n", " return WebSocket;\n", " } else if (typeof(MozWebSocket) !== 'undefined') {\n", " return MozWebSocket;\n", " } else {\n", " alert('Your browser does not have WebSocket support.' +\n", " 'Please try Chrome, Safari or Firefox ≥ 6. ' +\n", " 'Firefox 4 and 5 are also supported but you ' +\n", " 'have to enable WebSockets in about:config.');\n", " };\n", "}\n", "\n", "mpl.figure = function(figure_id, websocket, ondownload, parent_element) {\n", " this.id = figure_id;\n", "\n", " this.ws = websocket;\n", "\n", " this.supports_binary = (this.ws.binaryType != undefined);\n", "\n", " if (!this.supports_binary) {\n", " var warnings = document.getElementById(\"mpl-warnings\");\n", " if (warnings) {\n", " warnings.style.display = 'block';\n", " warnings.textContent = (\n", " \"This browser does not support binary websocket messages. \" +\n", " \"Performance may be slow.\");\n", " }\n", " }\n", "\n", " this.imageObj = new Image();\n", "\n", " this.context = undefined;\n", " this.message = undefined;\n", " this.canvas = undefined;\n", " this.rubberband_canvas = undefined;\n", " this.rubberband_context = undefined;\n", " this.format_dropdown = undefined;\n", "\n", " this.image_mode = 'full';\n", "\n", " this.root = $('
');\n", " this._root_extra_style(this.root)\n", " this.root.attr('style', 'display: inline-block');\n", "\n", " $(parent_element).append(this.root);\n", "\n", " this._init_header(this);\n", " this._init_canvas(this);\n", " this._init_toolbar(this);\n", "\n", " var fig = this;\n", "\n", " this.waiting = false;\n", "\n", " this.ws.onopen = function () {\n", " fig.send_message(\"supports_binary\", {value: fig.supports_binary});\n", " fig.send_message(\"send_image_mode\", {});\n", " if (mpl.ratio != 1) {\n", " fig.send_message(\"set_dpi_ratio\", {'dpi_ratio': mpl.ratio});\n", " }\n", " fig.send_message(\"refresh\", {});\n", " }\n", "\n", " this.imageObj.onload = function() {\n", " if (fig.image_mode == 'full') {\n", " // Full images could contain transparency (where diff images\n", " // almost always do), so we need to clear the canvas so that\n", " // there is no ghosting.\n", " fig.context.clearRect(0, 0, fig.canvas.width, fig.canvas.height);\n", " }\n", " fig.context.drawImage(fig.imageObj, 0, 0);\n", " };\n", "\n", " this.imageObj.onunload = function() {\n", " fig.ws.close();\n", " }\n", "\n", " this.ws.onmessage = this._make_on_message_function(this);\n", "\n", " this.ondownload = ondownload;\n", "}\n", "\n", "mpl.figure.prototype._init_header = function() {\n", " var titlebar = $(\n", " '
');\n", " var titletext = $(\n", " '
');\n", " titlebar.append(titletext)\n", " this.root.append(titlebar);\n", " this.header = titletext[0];\n", "}\n", "\n", "\n", "\n", "mpl.figure.prototype._canvas_extra_style = function(canvas_div) {\n", "\n", "}\n", "\n", "\n", "mpl.figure.prototype._root_extra_style = function(canvas_div) {\n", "\n", "}\n", "\n", "mpl.figure.prototype._init_canvas = function() {\n", " var fig = this;\n", "\n", " var canvas_div = $('
');\n", "\n", " canvas_div.attr('style', 'position: relative; clear: both; outline: 0');\n", "\n", " function canvas_keyboard_event(event) {\n", " return fig.key_event(event, event['data']);\n", " }\n", "\n", " canvas_div.keydown('key_press', canvas_keyboard_event);\n", " canvas_div.keyup('key_release', canvas_keyboard_event);\n", " this.canvas_div = canvas_div\n", " this._canvas_extra_style(canvas_div)\n", " this.root.append(canvas_div);\n", "\n", " var canvas = $('');\n", " canvas.addClass('mpl-canvas');\n", " canvas.attr('style', \"left: 0; top: 0; z-index: 0; outline: 0\")\n", "\n", " this.canvas = canvas[0];\n", " this.context = canvas[0].getContext(\"2d\");\n", "\n", " var backingStore = this.context.backingStorePixelRatio ||\n", "\tthis.context.webkitBackingStorePixelRatio ||\n", "\tthis.context.mozBackingStorePixelRatio ||\n", "\tthis.context.msBackingStorePixelRatio ||\n", "\tthis.context.oBackingStorePixelRatio ||\n", "\tthis.context.backingStorePixelRatio || 1;\n", "\n", " mpl.ratio = (window.devicePixelRatio || 1) / backingStore;\n", "\n", " var rubberband = $('');\n", " rubberband.attr('style', \"position: absolute; left: 0; top: 0; z-index: 1;\")\n", "\n", " var pass_mouse_events = true;\n", "\n", " canvas_div.resizable({\n", " start: function(event, ui) {\n", " pass_mouse_events = false;\n", " },\n", " resize: function(event, ui) {\n", " fig.request_resize(ui.size.width, ui.size.height);\n", " },\n", " stop: function(event, ui) {\n", " pass_mouse_events = true;\n", " fig.request_resize(ui.size.width, ui.size.height);\n", " },\n", " });\n", "\n", " function mouse_event_fn(event) {\n", " if (pass_mouse_events)\n", " return fig.mouse_event(event, event['data']);\n", " }\n", "\n", " rubberband.mousedown('button_press', mouse_event_fn);\n", " rubberband.mouseup('button_release', mouse_event_fn);\n", " // Throttle sequential mouse events to 1 every 20ms.\n", " rubberband.mousemove('motion_notify', mouse_event_fn);\n", "\n", " rubberband.mouseenter('figure_enter', mouse_event_fn);\n", " rubberband.mouseleave('figure_leave', mouse_event_fn);\n", "\n", " canvas_div.on(\"wheel\", function (event) {\n", " event = event.originalEvent;\n", " event['data'] = 'scroll'\n", " if (event.deltaY < 0) {\n", " event.step = 1;\n", " } else {\n", " event.step = -1;\n", " }\n", " mouse_event_fn(event);\n", " });\n", "\n", " canvas_div.append(canvas);\n", " canvas_div.append(rubberband);\n", "\n", " this.rubberband = rubberband;\n", " this.rubberband_canvas = rubberband[0];\n", " this.rubberband_context = rubberband[0].getContext(\"2d\");\n", " this.rubberband_context.strokeStyle = \"#000000\";\n", "\n", " this._resize_canvas = function(width, height) {\n", " // Keep the size of the canvas, canvas container, and rubber band\n", " // canvas in synch.\n", " canvas_div.css('width', width)\n", " canvas_div.css('height', height)\n", "\n", " canvas.attr('width', width * mpl.ratio);\n", " canvas.attr('height', height * mpl.ratio);\n", " canvas.attr('style', 'width: ' + width + 'px; height: ' + height + 'px;');\n", "\n", " rubberband.attr('width', width);\n", " rubberband.attr('height', height);\n", " }\n", "\n", " // Set the figure to an initial 600x600px, this will subsequently be updated\n", " // upon first draw.\n", " this._resize_canvas(600, 600);\n", "\n", " // Disable right mouse context menu.\n", " $(this.rubberband_canvas).bind(\"contextmenu\",function(e){\n", " return false;\n", " });\n", "\n", " function set_focus () {\n", " canvas.focus();\n", " canvas_div.focus();\n", " }\n", "\n", " window.setTimeout(set_focus, 100);\n", "}\n", "\n", "mpl.figure.prototype._init_toolbar = function() {\n", " var fig = this;\n", "\n", " var nav_element = $('
')\n", " nav_element.attr('style', 'width: 100%');\n", " this.root.append(nav_element);\n", "\n", " // Define a callback function for later on.\n", " function toolbar_event(event) {\n", " return fig.toolbar_button_onclick(event['data']);\n", " }\n", " function toolbar_mouse_event(event) {\n", " return fig.toolbar_button_onmouseover(event['data']);\n", " }\n", "\n", " for(var toolbar_ind in mpl.toolbar_items) {\n", " var name = mpl.toolbar_items[toolbar_ind][0];\n", " var tooltip = mpl.toolbar_items[toolbar_ind][1];\n", " var image = mpl.toolbar_items[toolbar_ind][2];\n", " var method_name = mpl.toolbar_items[toolbar_ind][3];\n", "\n", " if (!name) {\n", " // put a spacer in here.\n", " continue;\n", " }\n", " var button = $('