{ "cells": [ { "cell_type": "markdown", "id": "4d2a8b6c", "metadata": {}, "source": [ "#### Database" ] }, { "cell_type": "code", "execution_count": 247, "id": "7be9eeff", "metadata": {}, "outputs": [], "source": [ "PROJECT_PATH = '/home/md/Work/ligalytics/leagues_stable/'\n", "import os, sys\n", "sys.path.insert(0, PROJECT_PATH)\n", "os.environ.setdefault(\"DJANGO_SETTINGS_MODULE\", \"leagues.settings\")\n", "\n", "from leagues import settings\n", "settings.DATABASES['default']['NAME'] = PROJECT_PATH+'/db.sqlite3'\n", "\n", "import django\n", "django.setup()\n", "\n", "from scheduler.models import *\n", "from common.functions import distanceInKmByGPS\n", "season = Season.objects.filter(nicename=\"Imported: Benchmark Season\").first()\n", "import pandas as pd\n", "import numpy as np\n", "from django.db.models import Count, F, Value\n", "games = Game.objects.filter(season=season)\n", "df = pd.DataFrame.from_records(games.values())\n", "games = Game.objects.filter(season=season).annotate(\n", " home=F('homeTeam__shortname'),\n", " away=F('awayTeam__shortname'),\n", " home_lat=F('homeTeam__latitude'),\n", " home_lon=F('homeTeam__longitude'),\n", " home_attr=F('homeTeam__attractivity'),\n", " away_lat=F('awayTeam__latitude'),\n", " away_lon=F('awayTeam__longitude'),\n", " away_attr=F('awayTeam__attractivity')\n", ").values()\n", "\n" ] }, { "cell_type": "markdown", "id": "bc191792", "metadata": {}, "source": [ "#### Dataframe" ] }, { "cell_type": "code", "execution_count": 248, "id": "1e404cf8", "metadata": {}, "outputs": [], "source": [ "from sklearn.preprocessing import OneHotEncoder\n", "\n", "# create dataset\n", "df = pd.DataFrame.from_records(games.values())\n", "\n", "# pivots\n", "pivot_homeTeam_mean = df.pivot_table('attendance','homeTeam_id',aggfunc='mean')\n", "pivot_homeTeam_max = df.pivot_table('attendance','homeTeam_id',aggfunc='max')\n", "\n", "# add more features\n", "df['weekday'] = df.apply(lambda r: r['date'].weekday(), axis=1)\n", "df['day'] = df.apply(lambda r: r['date'].day, axis=1)\n", "df['month'] = df.apply(lambda r: r['date'].month, axis=1)\n", "df['year'] = df.apply(lambda r: r['date'].year, axis=1)\n", "df['distance'] = df.apply(lambda r: distanceInKmByGPS(r['home_lon'],r['home_lat'],r['away_lon'],r['away_lat']), axis=1)\n", "df['weekend'] = df.apply(lambda r: int(r['weekday'] in [6,7]), axis=1)\n", "df['winter_season'] = df.apply(lambda r: int(r['month'] in [1,2,3,10,11,12]), axis=1)\n", "df['home_base'] = df.apply(lambda r: pivot_homeTeam_mean.loc[r['homeTeam_id'],'attendance'], axis=1)\n", "df['stadium_size'] = df.apply(lambda r: pivot_homeTeam_max.loc[r['homeTeam_id'],'attendance'], axis=1)\n", "\n", "# one hot encoding\n", "ohe_fields = ['time', 'historic_season']\n", "\n", "for field in ohe_fields:\n", " ohe = OneHotEncoder()\n", " transformed = ohe.fit_transform(df[[field]])\n", " df[ohe.categories_[0]] = transformed.toarray()\n", "\n", "# sort label to last index\n", "cols = list(df.columns)\n", "cols.append(cols.pop(cols.index('attendance')))\n", "df = df[cols]" ] }, { "cell_type": "code", "execution_count": 249, "id": "e69d24dc", "metadata": {}, "outputs": [], "source": [ "#Importing Libraries\n", "import numpy as np # linear algebra\n", "import pandas as pd # data processing\n", "import matplotlib.pyplot as plt # plotting library\n", "from sklearn.model_selection import train_test_split,cross_val_score, cross_val_predict\n", "from sklearn import metrics\n", "from sklearn.linear_model import LinearRegression\n", "from sklearn.preprocessing import PolynomialFeatures\n", "from sklearn.tree import DecisionTreeRegressor\n", "from sklearn.ensemble import RandomForestRegressor" ] }, { "cell_type": "markdown", "id": "e2ea08e5", "metadata": {}, "source": [ "#### Train/Test Data - Normalization" ] }, { "cell_type": "code", "execution_count": 257, "id": "74e12f87", "metadata": {}, "outputs": [], "source": [ "from sklearn.model_selection import train_test_split\n", "\n", "\n", "remove_columns = ['season_id', 'resultEntered', 'reversible', 'reschedule', 'homeGoals', 'awayGoals',\n", " 'homeGoals2', 'awayGoals2', 'homeGoals3', 'awayGoals3', 'home', 'away', 'date', 'time', 'historic_season', 'id', 'homeTeam_id', 'awayTeam_id']\n", "feature_cols = list(set(df.columns[:-1]) - set(remove_columns))\n", "# feature_cols = ['weekday','weekend','home_base','distance','winter_season']\n", "label = 'attendance'\n", "\n", "\n", "X = df[feature_cols] # Features\n", "y = df[label] # Target variable\n", "\n", "X_train, X_test, y_train, y_test = train_test_split(\n", " X, y, test_size=0.3, random_state=1) # 70% training and 30% test" ] }, { "cell_type": "markdown", "id": "94ade4b4", "metadata": {}, "source": [ "#### Decision Tree" ] }, { "cell_type": "code", "execution_count": 183, "id": "4c9bdd0d", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "FITTING...done\n", "VISUALIZE\n" ] }, { "data": { "text/plain": [ "True" ] }, "execution_count": 183, "metadata": {}, "output_type": "execute_result" } ], "source": [ "import pydotplus\n", "from six import StringIO\n", "from sklearn.tree import export_graphviz\n", "from sklearn.tree import DecisionTreeRegressor \n", "from sklearn.preprocessing import OneHotEncoder\n", "\n", "\n", "# Create Decision Tree classifer object\n", "regr = DecisionTreeRegressor(max_depth=5, random_state=1234)\n", "\n", "# Train Decision Tree Classifer\n", "print(\"FITTING...\", end=\"\")\n", "regr = regr.fit(X_train, y_train)\n", "print(\"done\")\n", "\n", "# Predict the response for test dataset\n", "y_pred = regr.predict(X_test)\n", "\n", "\n", "# %%\n", "\n", "\n", "print(\"VISUALIZE\")\n", "dot_data = StringIO()\n", "export_graphviz(regr, out_file=dot_data,\n", " filled=True, rounded=True,\n", " special_characters=True, feature_names=feature_cols)\n", "graph = pydotplus.graph_from_dot_data(dot_data.getvalue())\n", "graph.write_png('attendance.png')\n", "# Image(graph.create_png())" ] }, { "cell_type": "code", "execution_count": null, "id": "a3297f84", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "markdown", "id": "c2e02abe", "metadata": {}, "source": [ "#### Other Models" ] }, { "cell_type": "code", "execution_count": 193, "id": "3eeb8fa4", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Suppport Vector Regression Accuracy: -0.026734414429896436\n", "R2 square: -0.026734414429896436\n", "MAE: 3585.035752797511\n", "MSE: 36159653.599150375\n" ] } ], "source": [ "from sklearn.svm import SVR\n", "from sklearn import metrics\n", "regressor= SVR(kernel='rbf')\n", "regressor.fit(X_train,y_train)\n", "y_pred_svm=regressor.predict(X_test)\n", "#y_pred_svm = cross_val_predict(regressor, x, y)\n", "mae=metrics.mean_absolute_error(y_test, y_pred_svm)\n", "mse=metrics.mean_squared_error(y_test, y_pred_svm)\n", "# Printing the metrics\n", "print('Suppport Vector Regression Accuracy: ', regressor.score(X_test,y_test))\n", "print('R2 square:',metrics.r2_score(y_test, y_pred_svm))\n", "print('MAE: ', mae)\n", "print('MSE: ', mse)" ] }, { "cell_type": "markdown", "id": "1899ba5a", "metadata": {}, "source": [ "#### Correlation Matrix" ] }, { "cell_type": "code", "execution_count": 197, "id": "738f39ca", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "" ] }, "execution_count": 197, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "\n", "import matplotlib.pyplot as plt\n", "import seaborn as sns\n", "# GETTING Correllation matrix\n", "corr_mat=X_train.corr(method='pearson')\n", "plt.figure(figsize=(20,10))\n", "sns.heatmap(corr_mat,vmax=1,square=True,annot=True,cmap='cubehelix')" ] }, { "cell_type": "code", "execution_count": 198, "id": "38f78b1c", "metadata": {}, "outputs": [], "source": [ "from sklearn.preprocessing import StandardScaler\n", "\n", "X_Train=X_train.values\n", "X_Train=np.asarray(X_Train)\n", "\n", "# Finding normalised array of X_Train\n", "X_std=StandardScaler().fit_transform(X_Train)" ] }, { "cell_type": "code", "execution_count": 199, "id": "ab28ec86", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Text(0, 0.5, 'Cumulative explained variance')" ] }, "execution_count": 199, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYUAAAEGCAYAAACKB4k+AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuNCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8QVMy6AAAACXBIWXMAAAsTAAALEwEAmpwYAAArz0lEQVR4nO3deXxU1f3/8deHsAkICARlFWQVQRYjuNbd4oZ7Bdyr4AZqra36rdVqN61LtcpPBVxABVxaKyru4i5I2GVHZFcI+x5I8vn9MRc6TUPmJmRyZ5L38/GYR+beOXPveyLOJ/fec88xd0dERASgStQBREQkdagoiIjIHioKIiKyh4qCiIjsoaIgIiJ7VI06QEk1atTIW7VqFXUMEZG0Mnny5DXunpmoXdoVhVatWpGdnR11DBGRtGJmS8K00+kjERHZQ0VBRET2UFEQEZE9VBRERGQPFQUREdkjaUXBzJ4zs9Vm9t1eXjcz+4eZLTSzGWbWI1lZREQknGQeKbwA9C7m9TOAdsFjIPBUErOIiEgISSsK7v45sK6YJucCIz1mAlDfzJokK49IOvs+Zwv3vzWb/AINdS/JFeXNa82AZXHLy4N1PxZuaGYDiR1N0LJly3IJJ5IKVm/awWMfL+CVScuoWbUKFx7RjMOa1os6llRgaXFHs7sPBYYCZGVl6U8lqfA27djF0M8W8eyXP5BXUMDlRx3MoJPb0qhOjaijSQUXZVFYAbSIW24erBOptHLz8nnxmyUMGb+Q9dt20adrU359ensOblg76mhSSURZFMYCg8xsDNAL2Oju/3PqSKQyyC9w3py2gkc+mM+KDds5vl0j7ujdkc7NdKpIylfSioKZjQZOBBqZ2XLgXqAagLs/DYwDzgQWAtuAq5OVRSRVuTufzs/hwXfnMvenzXRuVpcHLuzC8e0SDmYpkhRJKwru3i/B6w7clKz9i6S6acs28MC7c5iwaB0tG9TiiX7dOatLE6pUsaijSSWWFheaRSqSRTlbePiDeYyb+RMNa1fn/nMPo++RLaleVQMMSPRUFETKyepNO3j84wWMmbSMGlWrcOup7bj2+EOoU0P/G0rq0L9GkSTbvGMXQz9fxPAvfmBXfgGX9WrJoJPbkbm/updK6lFREEmS3Lx8XpqwlCc/WcD6bbs4p2tTblf3UklxKgoiZaygwHlz+goefj/WvfS4trHupV2aq3uppD4VBZEy4u58Nj+HB9+bx5wfN3FYU3UvlfSjoiBSBqYv28AD787lm0VradmgFv/o152z1b1U0pCKgsg+KNy99L4+h9Gvp7qXSvpSURAphcLdS285pR0DfqbupZL+9C9YpAQKdy+9tFdLBqt7qVQgKgoiIeTm5fPyhKU8OX4h67bu5OzDm3D76R1o1UjdS6ViUVEQKcbu7qWPfDCf5eu3c0ybhtx5RkcOb14/6mgiSaGiIFKEorqX/uX8LhzfrhFm6lEkFZeKgkgh8d1LWzTYj8f7duOcw5uqe6lUCioKIoEf1mzl4ffn8c7MH2lQuzp/OKcT/XsdrO6lUqmoKEilt3rzDv7x8QLGfLuM6lWrcPMp7RhwfGv2r1kt6mgi5U5FQSqtzTt2MezzRQwLupf269mSwae0pfH+NaOOJhIZFQWpdHLz8hk1cSlPfBLrXnpW0L20tbqXiiQuCmZWC/g10NLdB5hZO6CDu7+d9HQiZaigwBk7fSWPfDiPZevUvVSkKGGOFJ4HJgNHB8srgNcAFQVJG8vXb+O6Fycza+UmOjWpy8hfqnupSFHCFIU27n6JmfUDcPdtFvL/JDPrDTwOZADD3f2BQq8fDDwHZALrgMvcfXlJPoBIIj9t3MGlwyeybutOHrukG326qnupyN6E6Wu308z2AxzAzNoAuYneZGYZwBDgDKAT0M/MOhVq9jAw0t0PB+4H/lqC7CIJ5WzOpf/wCazZnMvIX/bkvO7NVBBEihGmKNwLvAe0MLOXgY+B34Z4X09gobsvcvedwBjg3EJtOgGfBM/HF/G6SKmt27qTy4ZP5McNO3j+6p50b3lA1JFEUl7CouDuHwIXAFcBo4Esd/80xLabAcvilpcH6+JND7YNcD6wv5k1LLwhMxtoZtlmlp2TkxNi11LZbdy+i8ufncgPa7cy/MoserZuEHUkkbSQsCiY2flAnru/E/Q4yjOz88po/7cDJ5jZVOAEYhex8ws3cveh7p7l7lmZmZraUIq3eccurnzuW+av2swzlx/BsW0bRR1JJG2EOn3k7ht3L7j7BmKnlBJZAbSIW24erNvD3Ve6+wXu3h34Xdz2RUpl2848fvnCJL5bsZEh/XtwUofGUUcSSSthikJRbcL0WpoEtDOz1mZWHegLjI1vYGaNzGz39u8i1hNJpFR27Mrn2hHZTF6ynsf6duP0ww6KOpJI2glTFLLN7FEzaxM8HiV230Kx3D0PGAS8D8wBXnX3WWZ2v5n1CZqdCMwzs/nAgcCfS/UppNLLzcvn+pcm882itTx8cVfOPrxp1JFE0pK5e/ENzGoDvwdODVZ9CPzJ3bcmOVuRsrKyPDs7O4pdS4ralV/AjS9P4cPZq/jrBV3o17Nl1JFEUo6ZTXb3rETtEp4GCr787yyTVCJlLC+/gFtfmcaHs1dxX5/DVBBE9lGYsY/aE+sl1Cq+vbufnLxYIokVFDi/fX0G78z4kf87syNXHtMq6kgiaS/MBePXgKeB4RTRXVQkCgUFzv+9MZN/TV3Br09rz8CftYk6kkiFEKYo5Ln7U0lPIhKSu3PfW7MYM2kZg05qy+BT2kUdSaTCCNP76C0zu9HMmphZg92PpCcTKYK789d35zLimyVce1xrfn16+6gjiVQoYY4Urgx+/iZunQOHlH0ckeL9/cP5DP18EZcfdTC/O+tQDX0tUsbC9D5qXR5BRBJ58pMF/OOThVyS1YL7+hymgiCSBKGm4zSzzsRGNN0zea27j0xWKJHChn+xiIc/mM/53Zvxlwu6aPhrkSQJ0yX1XmJ3HncCxhGbH+FLQEVBysXIbxbzp3fmcFaXJjx00eFkqCCIJE2YC80XAacAP7n71UBXoF5SU4kEXpm0lHvenMWphx7IY327UTUjzD9ZESmtMP+HbXf3AmJDZtcFVvPfo5+KJMUbU5dz579mckL7TIZc2p1qKggiSRfmmkK2mdUHhhEbCG8L8E0yQ4m8M+NHfv3qdI5q3ZBnLj+CGlUzoo4kUimE6X10Y/D0aTN7D6jr7jOSG0sqsw9nr+KWMVPp0fIAhl+ZRc1qKggi5WWvRcHMOrr7XDPrUcRrPdx9SnKjSWX06bzV3PTyFA5rVo/nrz6S2jVCdZATkTJS3P9xtwEDgUeKeM0BDYgnZerrhWu47sXJtG1ch5FX92T/mtWijiRS6ey1KLj7wGBWtLvd/atyzCSV0KTF67hmRDYHN6zFS9f2ol4tFQSRKBTbnSPodfRkOWWRSmrasg1c/fwkmtSryUvX9qJB7epRRxKptML08fvYzC40jSkgSfDdio1c8exEGtSuzqgBR9F4/5qJ3yQiSROmKFxHbE6FXDPbZGabzWxTknNJJTDvp81c/uxE6tSoyqgBvTiongqCSNTCdEndvzyCSOXyfc4WLh0+kWoZVRg14CiaH1Ar6kgiQrgjBczsADPraWY/2/0I+b7eZjbPzBaa2f/M82xmLc1svJlNNbMZZnZmST+ApJ8la7fSf9gEwBk14ChaNaoddSQRCYQZEO9a4BagOTANOIrYHc3Fdkk1swxgCHAasByYZGZj3X12XLO7gVfd/Skz2z3gXquSfwxJFys2bKf/sInk5hUwZuBRtG1cJ+pIIhInzJHCLcCRwBJ3PwnoDmwI8b6ewEJ3X+TuO4ExwLmF2jhQN3heD1gZJrSkp1WbdtB/2AQ27djFS9f0ouNBdRO/SUTKVZiisMPddwCYWQ13nwt0CPG+ZsCyuOXlwbp4fwAuM7PlxI4SBhe1ITMbaGbZZpadk5MTYteSanI259J/2ATWbM5lxC970rmZBtoVSUVhisLyYEC8fwMfmtmbwJIy2n8/4AV3bw6cCbwY3DD3X9x9qLtnuXtWZmZmGe1aysv6rTu5/NmJrNywg+ev7kmPlgdEHUlE9iJM76Pzg6d/MLPxxE7zvBdi2yv47yG2mwfr4l0D9A72842Z1QQaERueWyqAjdt3cflzE1m0ZivPX3UkPVs3iDqSiBQj4ZGCmf3DzI4BcPfP3H1scI0gkUlAOzNrbWbVgb7A2EJtlhKbwAczO5TYdJ86P1RBbMnN48rnvmXeT5t55rIjOLZto6gjiUgCYU4fTQbuNrPvzexhM8sKs2F3zwMGAe8Dc4j1MpplZvebWZ+g2a+BAWY2HRgNXOXuXvKPIalm2848fvn8JGau2MiT/XtwUsfGUUcSkRAs7HewmTUALiT2F39Ld2+XzGB7k5WV5dnZ2VHsWkLasSufa0ZM4pvv1/J43+6c07Vp1JFEKj0zm+zuCf+oL8n8hm2BjsDBwNzSBpOKLTcvnxtemszX36/loYu6qiCIpJkw1xT+ZmYLgPuBmUCWu5+T9GSSdnblFzB41FTGz8vhz+d14cIjmkcdSURKKMy0Vt8DR7v7mmSHkfSVX+D86pVpfDB7FX84pxP9e7WMOpKIlEKYLqnPlEcQSV8FBc5vXp/O2zN+5K4zOnLVsa2jjiQipVSSawoi/8Pd+d2/Z/KvKSu47bT2XHdCm6gjicg+UFGQUnN37ntrNqO/XcZNJ7Vh8Mlto44kIvtor6ePgi6oe+Xu68o+jqQLd+eBd+fywteLuea41tx+egc0OZ9I+ivumsJkYqOYGtASWB88r0/sTmSdOK7E/v7RAp75fBGXHdWSu886VAVBpILY6+kjd2/t7ocAHwHnuHsjd28InA18UF4BJfUMGb+Qf3y8gF9kNef+Pp1VEEQqkDDXFI5y93G7F9z9XeCY5EWSVDb8i0U89P48zuvWlL9ecDhVqqggiFQkYe5TWGlmdwMvBcuXoslwKqUXJyzhT+/M4cwuB/HwxV3JUEEQqXDCHCn0AzKBN4B/Bc/7JTOUpJ4PZv3EPW9+x6mHNuaxS7pTNUMd10QqojA3r60DbjGz2u6+tRwySYqZ8+Mmbn1lGl2a1ePJ/j2oXlUFQaSiCjP20TFmNpvY8NeYWVcz+39JTyYpYe2WXK4dkc3+Nasy7IosalbLiDqSiCRRmD/5/g78HFgL4O7TgZ8lM5Skhp15BVz/0mTWbMll6OVZHFi3ZtSRRCTJQp0HcPdlhVblJyGLpBB35+5/z2TS4vU8dHFXuraoH3UkESkHYXofLQum43QzqwbcQnAqSSqu575azKvZyxl8clv6aE4EkUojzJHC9cBNQDNgBdAtWJYK6tN5q/nzO7P5+WEH8qtT20cdR0TKUZjeR2uI3ZsglcDC1VsYPGoqHQ6qy6O/6Kab00QqmYRFwcwygQFAq/j27v7L5MWSKGzYtpNrR0yiRrUqDLviCGrXCHN2UUQqkjD/178JfEFsDKQSXWA2s97A40AGMNzdHyj0+t+Bk4LFWkBjd69fkn1I2diVX8BNo6awcsMORg/sRfMDakUdSUQiEKYo1HL3O0q6YTPLAIYApwHLgUlmNtbdZ+9u4+6/ims/GOhe0v1I2fjj27P5auFaHrrocI44uNhR00WkAgtzofltMzuzFNvuCSx090XuvhMYA5xbTPt+wOhS7Ef20YsTljDymyUM/NkhXJzVIuo4IhKhMEXhFmKFYbuZbTKzzWa2KcT7mgHx9zcsD9b9DzM7mNj8DJ/s5fWBZpZtZtk5OTkhdi1hfb1wDX8YO4uTOmRyR++OUccRkYglLAruvr+7V3H3/dy9brBct4xz9AVed/cir1m4+1B3z3L3rMzMzDLedeW1eM1Wbnh5Cq0b1ebxft016qmIFDsdZ0d3n2tmPYp63d2nJNj2CiD+XETzYF1R+qJ7H8rVph27uHZkNmYw/Ios6tasFnUkEUkBxV1ovg0YCDxSxGsOnJxg25OAdmbWmlgx6Av0L9zIzDoCBwDfhAks+y6/wLl59FQWr9nKyGt60qpR7agjiUiK2GtRcPeBwc+T9tamOO6eZ2aDgPeJdUl9zt1nmdn9QLa7jw2a9gXGuLuXZj9Scg++N5dP5+Xwp/M6c0ybRlHHEZEUEuruJDPrDHQC9gyT6e4jE70vmMZzXKF19xRa/kOYDFI2XstextDPF3HF0Qdz2VEHRx1HRFJMmDua7wVOJFYUxgFnAF8CCYuCpJbsxev43RvfcWzbhvz+7E5RxxGRFBSmS+pFwCnAT+5+NdAVqJfUVFLmlq/fxvUvTaZp/ZoM6d+DappOU0SKEOb00XZ3LzCzPDOrC6zmv3sVSYrbmpvHgJGTyc0rYMzAI6lfq3rUkUQkRYUpCtlmVh8YBkwGtqCeQmmjoMC57dVpzPtpE89ddSRtG9eJOpKIpLAwQ2ffGDx92szeA+q6+4zkxpKy8veP5vP+rFX8/uxOnNihcdRxRCTFFXfzWpE3re1+LcTNaxKxN6et4IlPFnJJVgt+eWyrqOOISBoo7kihqJvWdgtz85pEaPqyDfz29Rn0bNWAP57XGTMNYSEiiRV381qpblqT6P20cQcDRmbTqE4NnrqsB9WrqqeRiIQT5j6FmsCNwHHEjhC+AJ529x1JzialsGNXPgNfzGZrbh7/vPEYGtapEXUkEUkjYXofjQQ2A08Ey/2BF4GLkxVKSsfd+c3rM5i5YiNDL8+i40FlPZitiFR0YYpCZ3ePv/11vJnN3mtricyQ8Qt5a/pKftu7A6d1OjDqOCKShsKcbJ5iZkftXjCzXkB28iJJabz33U88/MF8zuvWlBtOaBN1HBFJU2GOFI4AvjazpcFyS2Cemc0E3N0PT1o6CWXWyo386pVpdG1RnwcuPFw9jUSk1MIUhd5JTyGllrM5lwEjsqm3XzWGXX4ENatlRB1JRNJYmKLQzt0/il9hZle6+4gkZZKQcvPyuf6lyazbtpPXrjuGxnVrJn6TiEgxwlxTuMfMnjKz2mZ2oJm9BZyT7GBSPHfnd298x+Ql63nk4m50aa6Ba0Vk34UpCicA3wPTiM2jMMrdL0pmKEls+Bc/8Prk5dxySjvOOrxJ1HFEpIIIUxQOAHoSKwy5wMGmK5mRGj93NX95dw5ndjmIW05pF3UcEalAwhSFCcB77t4bOBJoCnyV1FSyVwtWbWbw6Kl0alKXhy/uSpUqqs8iUnbCXGg+1d2XArj7duBmM/tZcmNJUdZv3ck1I7KpWS2DYVdkUat6qCm2RURCC3OksMbMfm9mwwDMrB0QavwEM+ttZvPMbKGZ3bmXNr8ws9lmNsvMRoWPXrnsyi/ghpcn89OmHQy94gia1t8v6kgiUgGF+VPzeWIzrh0dLK8AXgPeLu5NZpYBDAFOA5YDk8xsrLvPjmvTDrgLONbd15uZZoEpgrtz79hZTFi0jkd/0ZUeLQ+IOpKIVFBhjhTauPvfgF0A7r4NCHMiuyew0N0XuftOYAxwbqE2A4Ah7r4+2Pbq0MkrkRcnLGHUxKVcf0IbLujRPOo4IlKBhSkKO81sP2LDZmNmbYj1QkqkGbAsbnl5sC5ee6C9mX1lZhPMrMi7p81soJllm1l2Tk5OiF1XHF8uWMN9b83m1EMb85ufd4g6johUcGGKwr3Ae0ALM3sZ+Bj4bRntvyrQDjgR6AcMM7P6hRu5+1B3z3L3rMzMzDLader7Yc1Wbnx5Mm0z6/BY3+5kqKeRiCRZwmsK7v6hmU0BjiJ22ugWd18TYtsrgBZxy82DdfGWAxPdfRfwg5nNJ1YkJoUJX5Ft3L6La0ZMIqOKMfzKLOrUUE8jEUm+UPM0uvtad3/H3d8OWRAg9sXezsxam1l1oC8wtlCbfxM7SsDMGhE7nbQo5PYrrLz8AgaPnsrStdt4+rIjaNGgVtSRRKSSSNrkve6eBwwC3gfmAK+6+ywzu9/M+gTN3gfWBpP2jAd+4+5rk5UpXfxl3Fw+n5/Dn87rTK9DGkYdR0QqkaSek3D3ccC4QuvuiXvuwG3BQ4Ax3y7lua9+4OpjW9G3Z8uo44hIJRPqSMHMjjOzq4PnmWbWOrmxKqeJi9by+ze/4/h2jfjdmYdGHUdEKqGERcHM7gXuIHaTGUA14KVkhqqMlq3bxg0vT6HFAbV4sn8PqmYk7cyeiMhehfnmOR/oA2wFcPeVwP7JDFXZbMnN49oR2eTlFzD8yizq7Vct6kgiUkmFunktOPe/++a12smNVLnkFzi3jpnKwpwtDLm0B4dk1ok6kohUYmGKwqtm9gxQ38wGAB8Bw5Ibq/J4+IN5fDRnNfec3Ynj21WeG/NEJDWFuXntYTM7DdgEdADucfcPk56sEnhj6nKe+vR7+vdqyRVHHxx1HBGRxEXBzG4DXlEhKFtTlq7njn/O5KhDGnBfn8PQZHYikgrCnD7aH/jAzL4ws0FmdmCyQ1V0KzdsZ+DIyRxUtyZPXXoE1dTTSERSRMJvI3e/z90PA24CmgCfmdlHSU9WQW3bmceAkdns2JXP8CuzOKB29agjiYjsUZI7mlcDPwFrAU2GUwoFBc7tr01n9o+bePbKLNofqJ69IpJawty8dqOZfUpsyOyGwAB3PzzZwSqiJz5ZyLiZP3HXGR05uaPOwolI6glzpNACuNXdpyU5S4X25YI1PPbxfC7o3owBxx8SdRwRkSLttSiYWV133wQ8FCw3iH/d3dclOVuFkbM5l1tfmUabzDr86fzO6mkkIimruCOFUcDZwGRidzPHf5M5oD93QygocG57dRqbd+zi5Wt7Uau6JssRkdS1128odz87+KkRUffBU599zxcL1vCX87vQ4SBdWBaR1BbmQvPHYdbJ/8pevI5HP5zP2Yc3oV/PFonfICISseKuKdQEagGNzOwA/nP6qC7QrByypbUN23Zy8+ipNKu/H3+9oIuuI4hIWijuBPd1wK1AU2LXFXZ/q20CnkxurPTm7tz+2gxytuTyzxuOYf+aGgpbRNJDcdcUHgceN7PB7v5EOWZKey98vZiP5qzi7rMO5fDm9aOOIyISWphhLp4ws85m9gszu2L3I8zGzay3mc0zs4VmdmcRr19lZjlmNi14XFuaD5FKvluxkb+Om8spHRtzzXG6Ri8i6SXMKKn3AicCnYBxwBnAl8DIBO/LAIYApwHLgUlmNtbdZxdq+oq7Dyp59NSzeccuBo2aQsM61Xn44q66jiAiaSfM8JwXAacAP7n71UBXoF6I9/UEFrr7InffCYwBzi110hTn7vzuje9Yum4bj/ftroHuRCQthSkK2929AMgzs7rEBsYL07+yGbAsbnk5RfdautDMZpjZ62aWtv02X81extjpK/nVqe3p2bpB4jeIiKSgMEUh28zqE5uCczIwBfimjPb/FtAqGGDvQ2BEUY3MbKCZZZtZdk5OThntuuzMX7WZe8fO4pg2DbnxpLZRxxERKbUw03HeGDx92szeA+q6+4wQ217Bfx9RNA/WxW97bdzicOBve8kwFBgKkJWV5SH2XW6278xn0Kgp1K5elccu6UZGFV1HEJH0VdzNaz2Ke83dpyTY9iSgnZm1JlYM+gL9C22nibv/GCz2AeaESp1C7n97FvNXbWHEL3vSuG7NqOOIiOyT4o4UHinmNQdOLm7D7p5nZoOA94EM4Dl3n2Vm9wPZ7j4WuNnM+gB5wDrgqpKEj9rY6SsZ/e0ybjixDSe0z4w6jojIPjP3lDobk1BWVpZnZ2dHHYPFa7Zy9hNf0v7AOrxy3dGaZ1lEUpqZTXb3rETtwtynUOSNau5e7H0KFVluXj6DR0+lisE/+nVXQRCRCiPM4P5Hxj2vSeyehSkkuHmtInvw3XnMXLGRZy4/guYH1Io6johImQnT+2hw/HLQPXVMsgKluo9mr+K5r37gyqMP5ueHHRR1HBGRMlWa8x5bgUo5qM/KDdu5/fXpHNa0LnedeWjUcUREylyYawpvEettBLEi0gl4NZmhUlFefgE3j57KrrwCnuzfg5rVMqKOJCJS5sJcU3g47nkesMTdlycpT8p67KMFZC9Zz2OXdKN1o9pRxxERSYow1xQ+AwjGPaoaPG/g7uuSnC1lfLlgDUM+XcjFRzTnvO6adE5EKq4wp48GAvcDO4ACYjOwOXBIcqOlhpzNudz6yjTaZNbhvnMPizqOiEhShTl99Bugs7uvSXaYVFNQ4Nz26jQ279jFS9f2pFb1ML8uEZH0Fab30ffAtmQHSUVPf/49XyxYw73nHEbHg+pGHUdEJOnC/Ol7F/C1mU0EcnevdPebk5YqBWQvXscjH8znrMOb0K9n2k7zICJSImGKwjPAJ8BMYtcUKrwN23Zy8+ipNKu/H3+9oIum1RSRSiNMUajm7rclPUmKcHd++/oMcrbk8vr1x1C3ZrWoI4mIlJsw1xTeDWY+a2JmDXY/kp4sIiO+XswHs1dxR++OdG1RP+o4IiLlKsyRQr/g511x6ypkl9TvVmzkL+PmckrHxlxzXKUcyUNEKrkwN69Vim/HLbl5DBo1hQa1q/PQxV11HUFEKiXNp0DsOsLv3pjJ0nXbGDPwaBrUrh51JBGRSGg+BeC17OW8OW0lt53Wnp6tK+zlEhGRhCr9fAoLVm3mnrHfcUybhtx0Utuo44iIRKpSz6ewY1c+g0ZNpXb1qjx2STcyqug6gohUbgmLgpm9ZWZjg8fbwDzgjTAbN7PeZjbPzBaa2Z3FtLvQzNzMEk4qXZbue2s281Zt5tFLutG4bs3y3LWISEpK2nwKZpYBDAFOA5YDk8xsrLvPLtRuf+AWYGLo1GXgrekrGf3tUq4/oQ0ntM8sz12LiKSsvRYFM2sLHLh7PoW49ceaWQ13/z7BtnsCC919UfC+McC5wOxC7f4IPEhsNNZysWTtVu7610x6tKzPr09vX167FRFJecWdPnoM2FTE+k3Ba4k0A5bFLS8P1u1hZj2AFu7+TnEbCu6ozjaz7JycnBC73rudeQUMHj2VKgb/6NedahmluawiIlIxFfeNeKC7zyy8MljXal93bGZVgEeBXydq6+5D3T3L3bMyM/ftVM+D781lxvKN/O2irjQ/oNY+bUtEpKIprijUL+a1/UJsewUQP+Z082DdbvsDnYFPzWwxcBQwNpkXmz+avYpnv/yBK48+mN6dD0rWbkRE0lZxRSHbzAYUXmlm1wKTQ2x7EtDOzFqbWXWgLzB294vuvtHdG7l7K3dvBUwA+rh7dok+QUgrN2zn9ten06lJXe4689Bk7EJEJO0V1/voVuANM7uU/xSBLKA6cH6iDbt7npkNAt4HMoDn3H2Wmd0PZLv72OK3UHby8gu4ZcxUduYV8GT/7tSsllFeuxYRSSt7LQruvgo4xsxOInaaB+Add/8k7MbdfRwwrtC6e/bS9sSw2y2pxz9ewKTF6/n7JV05JLNOsnYjIpL2wgxzMR4YXw5ZkuKrhWt4cvxCLj6iOed3bx51HBGRlFah+2PmbM7l1lem0SazDvede1jUcUREUl6YO5rTUkGBc9ur09i0fRcvXtOTWtUr7EcVESkzFfZI4enPv+eLBWu455xOdDyobtRxRETSQoUsCpOXrOORD+ZzVpcm9O/ZMuo4IiJpo8IVhQ3bdnLz6Gk0rV+Tv17YRdNqioiUQIU60e7u/Pb1GazevIPXrz+GujWrRR1JRCStVKgjhZHfLOGD2au4o3dHuraoH3UcEZG0U2GKwncrNvLnd+ZwcsfGXHNchZgYTkSk3FWIorAlN49Bo6bQoHZ1Hr64q64jiIiUUtpfU3B37n5jJkvXbWP0gKNoULt61JFERNJW2h8pvDZ5Of+etpJbT21Pr0MaRh1HRCStpXVRWLh6M/e+OYtj2jTkppPaRh1HRCTtpW1R2LErn5tenkqt6hk8dkk3MqroOoKIyL5K22sK9701m3mrNvPC1UfSuG7NqOOIiFQIaXmk8PaMlYz+dinXnXAIJ3ZoHHUcEZEKI+2Kws68Au7650y6t6zP7ad3iDqOiEiFknZFYem6bZjBE/26Uy0j7eKLiKS0tPtW3b4rn79ddDjND6gVdRQRkQon7YpCozo16N25SdQxREQqpKQWBTPrbWbzzGyhmd1ZxOvXm9lMM5tmZl+aWadE22xSTz2NRESSJWlFwcwygCHAGUAnoF8RX/qj3L2Lu3cD/gY8mqw8IiKSWDKPFHoCC919kbvvBMYA58Y3cPdNcYu1AU9iHhERSSCZN681A5bFLS8HehVuZGY3AbcB1YGTi9qQmQ0EBgK0bKnpNUVEkiXyC83uPsTd2wB3AHfvpc1Qd89y96zMzMzyDSgiUokksyisAFrELTcP1u3NGOC8JOYREZEEklkUJgHtzKy1mVUH+gJj4xuYWbu4xbOABUnMIyIiCSTtmoK755nZIOB9IAN4zt1nmdn9QLa7jwUGmdmpwC5gPXBlsvKIiEhiSR0l1d3HAeMKrbsn7vktydy/iIiUjLmnVy9QM9sMzIs6xz5oBKyJOsQ+UP7opHN2UP6odXD3/RM1Ssf5FOa5e1bUIUrLzLKVPzrpnD+ds4PyR83MssO0i7xLqoiIpA4VBRER2SMdi8LQqAPsI+WPVjrnT+fsoPxRC5U/7S40i4hI8qTjkYKIiCSJioKIiOyRVkUh0aQ9qczMnjOz1Wb2XdRZSsrMWpjZeDObbWazzCytbjo0s5pm9q2ZTQ/y3xd1ptIwswwzm2pmb0edpaTMbHHchFqhukamEjOrb2avm9lcM5tjZkdHnSkMM+sQ/M53PzaZ2a3FviddrikEk/bMB04jNgz3JKCfu8+ONFhIZvYzYAsw0t07R52nJMysCdDE3aeY2f7AZOC8NPrdG1Db3beYWTXgS+AWd58QcbQSMbPbgCygrrufHXWekjCzxUCWu6flzV9mNgL4wt2HB2O51XL3DRHHKpHgO3QF0Mvdl+ytXTodKSSctCeVufvnwLqoc5SGu//o7lOC55uBOcTmy0gLHrMlWKwWPNLjr6GAmTUnNmjk8KizVDZmVg/4GfAsgLvvTLeCEDgF+L64ggDpVRSKmrQnbb6YKgozawV0ByZGHKVEglMv04DVwIfunlb5gceA3wIFEecoLQc+MLPJwaRZ6aQ1kAM8H5y+G25mtaMOVQp9gdGJGqVTUZCImVkd4J/ArYWmUk157p4fzAXeHOhpZmlzCs/MzgZWu/vkqLPsg+PcvQexOdtvCk6npouqQA/gKXfvDmwF0u2aZnWgD/BaorbpVBRKOmmPlKHgXPw/gZfd/V9R5ymt4LB/PNA74iglcSzQJzgvPwY42cxeijZSybj7iuDnauANYqeD08VyYHnc0eXrxIpEOjkDmOLuqxI1TKeikHDSHkmO4ELts8Acd3806jwlZWaZZlY/eL4fsc4KcyMNVQLufpe7N3f3VsT+3X/i7pdFHCs0M6sddFAgOO1yOpA2vfDc/SdgmZl1CFadAqRFJ4s4/Qhx6gjSaJTUvU3aE3Gs0MxsNHAi0MjMlgP3uvuz0aYK7VjgcmBmcF4e4P+C+TLSQRNgRND7ogrwqrunXbfONHYg8EbsbwuqAqPc/b1oI5XYYODl4A/SRcDVEecJLSjEpwHXhWqfLl1SRUQk+dLp9JGIiCSZioKIiOyhoiAiInuoKIiIyB4qCiIisoeKgpQLM3MzeyRu+XYz+0MZbfsFM7uoLLaVYD8XByNkjk/2vqJmZv8XdQaJhoqClJdc4AIzaxR1kHhmVpJ7da4BBrj7ScnKk0JUFCopFQUpL3nE5oj9VeEXCv+lb2Zbgp8nmtlnZvammS0yswfM7NJgboSZZtYmbjOnmlm2mc0PxgraPQjeQ2Y2ycxmmNl1cdv9wszGUsSdqWbWL9j+d2b2YLDuHuA44Fkze6iI99wRvGe6mT0QrOtmZhOCfb9hZgcE6z81s78HeeeY2ZFm9i8zW2BmfwratArG7n85aPO6mdUKXjslGJhtpsXm6agRrF9sZveZ2ZTgtY7B+tpBu2+D950brL8q2O97wb7/Fqx/ANjPYuPvvxy8/53gs31nZpeU4L+7pBt310OPpD+IzSVRF1gM1ANuB/4QvPYCcFF82+DnicAGYnck1yA21tV9wWu3AI/Fvf89Yn/ktCM2Vk1NYCBwd9CmBpBNbMTLE4kNata6iJxNgaVAJrG7bz8hNncEwKfE5gQo/J4zgK+JjbEP0CD4OQM4IXh+f1zeT4EH4z7HyrjPuBxoCLQiNrLosUG754LfWU1iowW3D9aPJDZAIcHvdnDw/EZgePD8L8BlwfP6xOYlqQ1cRezu3HrBdpcALeL/GwTPLwSGxS3Xi/rfkx7Je+hIQcqNx0ZWHQncXIK3TfLYfA65wPfAB8H6mcS+OHd71d0L3H0BsS+6jsTG2LkiGJpjIrEv23ZB+2/d/Yci9nck8Km757h7HvAysbH0i3Mq8Ly7bws+5zqLjcFf390/C9qMKLSd3eN2zQRmxX3GRfxn4Mdl7v5V8PwlYkcqHYAf3H3+Xra7e7DCyfzn93M6cGfwe/iUWAFoGbz2sbtvdPcdxI6aDi7i880ETjOzB83seHffmOD3IWksbcY+kgrjMWAK8HzcujyCU5lmVgWoHvdabtzzgrjlAv7732/h8VocMGJ/Ob8f/4KZnUjsSCFK8Z+j8Gfc/bmK+kxht5sftx0DLnT3efENzaxXoX3Hv+c/O3Wfb2Y9gDOBP5nZx+5+f4gskoZ0pCDlyt3XAa8Su2i722LgiOB5H2Izo5XUxWZWJbjOcAgwj9jgiTdYbNhvzKy9JZ4c5VvgBDNrFAyg1w/4LMF7PgSujjvn3yD4a3q9mR0ftLk8xHYKa2n/mQu4P7FpROcBrcysbQm2+z4w2IIR6cyse4h974r7vTUFtrn7S8BDpN+w0VICOlKQKDwCDIpbHga8aWbTiV0bKM1f8UuJfaHXBa539x1mNpzYKZQpwRdiDnBecRtx9x/N7E5icy4Y8I67v5ngPe+ZWTcg28x2AuOI9d65Eng6KBalGVlzHrEJaZ4jdmrnqeBzXQ28FvScmgQ8nWA7fyR2hDYjOBL7AUg0x/PQoP0UYqf8HjKzAmAXcEMJP4ekEY2SKpKCLDbt6dvunjYzxEnFoNNHIiKyh44URERkDx0piIjIHioKIiKyh4qCiIjsoaIgIiJ7qCiIiMge/x+r4acrVVRpnwAAAABJRU5ErkJggg==", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "from sklearn.decomposition import PCA\n", "pca = PCA().fit(X_std)\n", "plt.plot(np.cumsum(pca.explained_variance_ratio_))\n", "plt.xlim(0,7,1)\n", "plt.xlabel('Number of components')\n", "plt.ylabel('Cumulative explained variance')" ] }, { "cell_type": "code", "execution_count": 200, "id": "b40d92f1", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/home/md/miniconda3/envs/leagues/lib/python3.7/site-packages/seaborn/_decorators.py:43: FutureWarning: Pass the following variable as a keyword arg: y. From version 0.12, the only valid positional argument will be `data`, and passing other arguments without an explicit keyword will result in an error or misinterpretation.\n", " FutureWarning\n", "/home/md/miniconda3/envs/leagues/lib/python3.7/site-packages/seaborn/distributions.py:1718: UserWarning: `shade_lowest` is now deprecated in favor of `thresh`. Setting `thresh=0.05`, but please update your code.\n", " warnings.warn(msg, UserWarning)\n", "/home/md/miniconda3/envs/leagues/lib/python3.7/site-packages/seaborn/_decorators.py:43: FutureWarning: Pass the following variable as a keyword arg: y. From version 0.12, the only valid positional argument will be `data`, and passing other arguments without an explicit keyword will result in an error or misinterpretation.\n", " FutureWarning\n", "/home/md/miniconda3/envs/leagues/lib/python3.7/site-packages/seaborn/distributions.py:1718: UserWarning: `shade_lowest` is now deprecated in favor of `thresh`. Setting `thresh=0.05`, but please update your code.\n", " warnings.warn(msg, UserWarning)\n", "/home/md/miniconda3/envs/leagues/lib/python3.7/site-packages/seaborn/_decorators.py:43: FutureWarning: Pass the following variable as a keyword arg: y. From version 0.12, the only valid positional argument will be `data`, and passing other arguments without an explicit keyword will result in an error or misinterpretation.\n", " FutureWarning\n", "/home/md/miniconda3/envs/leagues/lib/python3.7/site-packages/seaborn/distributions.py:1718: UserWarning: `shade_lowest` is now deprecated in favor of `thresh`. Setting `thresh=0.05`, but please update your code.\n", " warnings.warn(msg, UserWarning)\n" ] }, { "data": { "text/plain": [ "(-2.0, 2.0)" ] }, "execution_count": 200, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "", "text/plain": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "from sklearn.decomposition import PCA\n", "sklearn_pca=PCA(n_components=5)\n", "X_Train=sklearn_pca.fit_transform(X_std)\n", "\n", "sns.set(style='darkgrid')\n", "f, ax = plt.subplots(figsize=(8, 8))\n", "# ax.set_aspect('equal')\n", "ax = sns.kdeplot(X_Train[:,0], X_Train[:,1], cmap=\"Greens\",\n", " shade=True, shade_lowest=False)\n", "ax = sns.kdeplot(X_Train[:,1], X_Train[:,2], cmap=\"Reds\",\n", " shade=True, shade_lowest=False)\n", "ax = sns.kdeplot(X_Train[:,2], X_Train[:,3], cmap=\"Blues\",\n", " shade=True, shade_lowest=False)\n", "red = sns.color_palette(\"Reds\")[-2]\n", "blue = sns.color_palette(\"Blues\")[-2]\n", "green = sns.color_palette(\"Greens\")[-2]\n", "ax.text(0.5, 0.5, \"2nd and 3rd Projection\", size=12, color=blue)\n", "ax.text(-4, 0.0, \"1st and 3rd Projection\", size=12, color=red)\n", "ax.text(2, 0, \"1st and 2nd Projection\", size=12, color=green)\n", "plt.xlim(-6,5)\n", "plt.ylim(-2,2)" ] }, { "cell_type": "code", "execution_count": 202, "id": "aa1bd2a4", "metadata": {}, "outputs": [], "source": [ "number_of_samples = len(y_train)\n", "np.random.seed(0)\n", "random_indices = np.random.permutation(number_of_samples)\n", "num_training_samples = int(number_of_samples*0.75)\n", "x_train = X_Train[random_indices[:num_training_samples]]\n", "y_train=y[random_indices[:num_training_samples]]\n", "x_test=X_Train[random_indices[num_training_samples:]]\n", "y_test=y[random_indices[num_training_samples:]]\n", "y_Train=list(y_train)" ] }, { "cell_type": "code", "execution_count": 204, "id": "2acb04aa", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Train error = 98.44699684022491 percent in Ridge Regression\n", "Test error = 99.98330172684955 percent in Ridge Regression\n" ] } ], "source": [ "from sklearn import linear_model\n", "model=linear_model.Ridge()\n", "model.fit(x_train,y_train)\n", "y_predict=model.predict(x_train)\n", "\n", "error=0\n", "for i in range(len(y_Train)):\n", " error+=(abs(y_Train[i]-y_predict[i])/y_Train[i])\n", "train_error_ridge=error/len(y_Train)*100\n", "print(\"Train error = \"'{}'.format(train_error_ridge)+\" percent in Ridge Regression\")\n", "\n", "Y_test=model.predict(x_test)\n", "y_Predict=list(y_test)\n", "\n", "error=0\n", "for i in range(len(y_test)):\n", " error+=(abs(y_Predict[i]-Y_test[i])/y_Predict[i])\n", "test_error_ridge=error/len(Y_test)*100\n", "print(\"Test error = \"'{}'.format(test_error_ridge)+\" percent in Ridge Regression\")" ] }, { "cell_type": "code", "execution_count": 206, "id": "10001f77", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Text(0.5, 1.0, 'Residual plot in Ridge Regression')" ] }, "execution_count": 206, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "", "text/plain": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "import matplotlib\n", "matplotlib.rcParams['figure.figsize'] = (6.0, 6.0)\n", "\n", "preds = pd.DataFrame({\"preds\":model.predict(x_train), \"true\":y_train})\n", "preds[\"residuals\"] = preds[\"true\"] - preds[\"preds\"]\n", "preds.plot(x = \"preds\", y = \"residuals\",kind = \"scatter\")\n", "plt.title(\"Residual plot in Ridge Regression\")" ] }, { "cell_type": "code", "execution_count": 208, "id": "aef6ee00", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Train error = 93.27939871725515 percent in Knn algorithm\n", "Test error = 34.00485323676349 percent in knn algorithm\n" ] } ], "source": [ "from sklearn import neighbors\n", "n_neighbors=5\n", "knn=neighbors.KNeighborsRegressor(n_neighbors,weights='uniform')\n", "knn.fit(x_train,y_train)\n", "y1_knn=knn.predict(x_train)\n", "y1_knn=list(y1_knn)\n", "\n", "error=0\n", "for i in range(len(y_train)):\n", " error+=(abs(y1_knn[i]-y_Train[i])/y_Train[i])\n", "train_error_knn=error/len(y_Train)*100\n", "print(\"Train error = \"+'{}'.format(train_error_knn)+\" percent\"+\" in Knn algorithm\")\n", "\n", "y2_knn=knn.predict(x_test)\n", "y2_knn=list(y2_knn)\n", "error=0\n", "for i in range(len(y_test)):\n", " error+=(abs(y2_knn[i]-Y_test[i])/Y_test[i])\n", "test_error_knn=error/len(Y_test)*100\n", "print(\"Test error = \"'{}'.format(test_error_knn)+\" percent\"+\" in knn algorithm\")" ] }, { "cell_type": "code", "execution_count": 209, "id": "d141476e", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Train error = 98.42901599605693 percent in Bayesian Regression\n", "Test error = 2.6273348309846623 percent in Bayesian Regression\n" ] } ], "source": [ "reg = linear_model.BayesianRidge()\n", "reg.fit(x_train,y_train)\n", "y1_reg=reg.predict(x_train)\n", "y1_reg=list(y1_reg)\n", "y2_reg=reg.predict(x_test)\n", "y2_reg=list(y2_reg)\n", "\n", "error=0\n", "for i in range(len(y_train)):\n", " error+=(abs(y1_reg[i]-y_Train[i])/y_Train[i])\n", "train_error_bay=error/len(y_Train)*100\n", "print(\"Train error = \"+'{}'.format(train_error_bay)+\" percent\"+\" in Bayesian Regression\")\n", "\n", "error=0\n", "for i in range(len(y_test)):\n", " error+=(abs(y2_reg[i]-Y_test[i])/Y_test[i])\n", "test_error_bay=(error/len(Y_test))*100\n", "print(\"Test error = \"+'{}'.format(test_error_bay)+\" percent\"+\" in Bayesian Regression\")" ] }, { "cell_type": "code", "execution_count": 236, "id": "cc4ef924", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Train error = 45.1440475751096 percent in Decision Tree Regressor\n", "Test error = 115.23229120765764 percent in Decision Tree Regressor\n" ] } ], "source": [ "from sklearn.tree import DecisionTreeRegressor \n", "\n", "dec = DecisionTreeRegressor(max_depth=5, random_state=1234)\n", "dec.fit(X_train,y_train)\n", "y1_dec=dec.predict(X_train)\n", "y1_dec=list(y1_dec)\n", "y2_dec=dec.predict(X_test)\n", "y2_dec=list(y2_dec)\n", "\n", "error=0\n", "for i,v in enumerate(y_train):\n", " error+=(abs(y1_dec[i]-v)/v)\n", "train_error_tree=error/len(y_train)*100\n", "print(\"Train error = \"+'{}'.format(train_error_tree)+\" percent\"+\" in Decision Tree Regressor\")\n", "\n", "error=0\n", "for i,v in enumerate(y_test):\n", " error+=(abs(y1_dec[i]-v)/v)\n", "test_error_tree=error/len(y_test)*100\n", "print(\"Test error = \"'{}'.format(test_error_tree)+\" percent in Decision Tree Regressor\")" ] }, { "cell_type": "code", "execution_count": 240, "id": "f8371c65", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Mutiple Linear Regression Accuracy: 0.3760362035902819\n", "Cross-Predicted(KFold) Mutiple Linear Regression Accuracy: 0.26323368310834494\n" ] } ], "source": [ "lin_reg = LinearRegression()\n", "lin_reg.fit(X_train,y_train)\n", "\n", "#Predicting the SalePrices using test set \n", "y_pred_lr = lin_reg.predict(X_test)\n", "\n", "#Mutiple Linear Regression Accuracy with test set\n", "accuracy_lf = metrics.r2_score(y_test, y_pred_lr)\n", "print('Mutiple Linear Regression Accuracy: ', accuracy_lf)\n", "\n", "#Predicting the SalePrice using cross validation (KFold method)\n", "y_pred_kf_lr = cross_val_predict(lin_reg, X, y, cv=10 )\n", "\n", "#Mutiple Linear Regression Accuracy with cross validation (KFold method)\n", "accuracy_lf = metrics.r2_score(y, y_pred_kf_lr)\n", "print('Cross-Predicted(KFold) Mutiple Linear Regression Accuracy: ', accuracy_lf)" ] }, { "cell_type": "code", "execution_count": 241, "id": "1a05e96e", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Cross-Predicted(KFold) Polynominal Regression Accuracy: 0.2973208431334121\n" ] } ], "source": [ "poly_reg = PolynomialFeatures(degree = 2)\n", "X_poly = poly_reg.fit_transform(X)\n", "lin_reg_pl = LinearRegression()\n", "\n", "#Predicting the SalePrice using cross validation (KFold method)\n", "y_pred_pl = cross_val_predict(lin_reg_pl, X_poly, y, cv=10 )\n", "#Polynominal Regression Accuracy with cross validation\n", "accuracy_pl = metrics.r2_score(y, y_pred_pl)\n", "print('Cross-Predicted(KFold) Polynominal Regression Accuracy: ', accuracy_pl)" ] }, { "cell_type": "code", "execution_count": 242, "id": "3002f71b", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Decision Tree Regression Accuracy: 0.41316821537538995\n", "Cross-Predicted(KFold) Decision Tree Regression Accuracy: 0.43545963056661785\n" ] } ], "source": [ "dt_regressor = DecisionTreeRegressor(random_state = 0)\n", "dt_regressor.fit(X_train,y_train)\n", "\n", "#Predicting the SalePrices using test set \n", "y_pred_dt = dt_regressor.predict(X_test)\n", "\n", "#Decision Tree Regression Accuracy with test set\n", "print('Decision Tree Regression Accuracy: ', dt_regressor.score(X_test,y_test))\n", "\n", "#Predicting the SalePrice using cross validation (KFold method)\n", "y_pred_dt = cross_val_predict(dt_regressor, X, y, cv=10 )\n", "#Decision Tree Regression Accuracy with cross validation\n", "accuracy_dt = metrics.r2_score(y, y_pred_dt)\n", "print('Cross-Predicted(KFold) Decision Tree Regression Accuracy: ', accuracy_dt)" ] }, { "cell_type": "code", "execution_count": 243, "id": "45e08026", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Random Forest Regression Accuracy: 0.5152096358767789\n", "Cross-Predicted(KFold) Random Forest Regression Accuracy: 0.5008508177441224\n" ] } ], "source": [ "rf_regressor = RandomForestRegressor(n_estimators = 300 , random_state = 0)\n", "rf_regressor.fit(X_train,y_train)\n", "\n", "#Predicting the SalePrices using test set \n", "y_pred_rf = rf_regressor.predict(X_test)\n", "\n", "#Random Forest Regression Accuracy with test set\n", "print('Random Forest Regression Accuracy: ', rf_regressor.score(X_test,y_test))\n", "\n", "#Predicting the SalePrice using cross validation (KFold method)\n", "y_pred_rf = cross_val_predict(rf_regressor, X, y, cv=10 )\n", "\n", "#Random Forest Regression Accuracy with cross validation\n", "accuracy_rf = metrics.r2_score(y, y_pred_rf)\n", "print('Cross-Predicted(KFold) Random Forest Regression Accuracy: ', accuracy_rf)" ] }, { "cell_type": "code", "execution_count": 245, "id": "0de49b8a", "metadata": {}, "outputs": [ { "data": { "image/png": "", "text/plain": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "ranking = np.argsort(-rf_regressor.feature_importances_)\n", "f, ax = plt.subplots(figsize=(15, 10))\n", "sns.barplot(x=rf_regressor.feature_importances_[ranking], y=X_train.columns.values[ranking], orient='h')\n", "ax.set_xlabel(\"feature importance\")\n", "plt.tight_layout()\n", "plt.show()" ] }, { "cell_type": "code", "execution_count": 246, "id": "eff9fa74", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Mutiple Linear Regression Accuracy: 0.3760362035902819\n", "Cross-Predicted(KFold) Mutiple Linear Regression Accuracy: 0.26323368310834494\n" ] } ], "source": [ "lin_reg = LinearRegression()\n", "lin_reg.fit(X_train,y_train)\n", "\n", "#Predicting the SalePrices using test set \n", "y_pred_lr = lin_reg.predict(X_test)\n", "\n", "#Mutiple Linear Regression Accuracy with test set\n", "accuracy_lf = metrics.r2_score(y_test, y_pred_lr)\n", "print('Mutiple Linear Regression Accuracy: ', accuracy_lf)\n", "\n", "#Predicting the SalePrice using cross validation (KFold method)\n", "y_pred_kf_lr = cross_val_predict(lin_reg, X, y, cv=10 )\n", "\n", "#Mutiple Linear Regression Accuracy with cross validation (KFold method)\n", "accuracy_lf = metrics.r2_score(y, y_pred_kf_lr)\n", "print('Cross-Predicted(KFold) Mutiple Linear Regression Accuracy: ', accuracy_lf)" ] }, { "cell_type": "code", "execution_count": null, "id": "470425b6", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3.7.13 ('leagues')", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.13" }, "vscode": { "interpreter": { "hash": "a07b7f3079ca8c056705d3c757c4f3f92f9509f33eeab9ad5420dacec37bc01a" } } }, "nbformat": 4, "nbformat_minor": 5 }