{ "cells": [ { "cell_type": "markdown", "id": "4d2a8b6c", "metadata": {}, "source": [ "#### Database" ] }, { "cell_type": "code", "execution_count": 1, "id": "7be9eeff", "metadata": {}, "outputs": [], "source": [ "PROJECT_PATH = '/home/md/Work/ligalytics/leagues_stable/'\n", "import os, sys\n", "sys.path.insert(0, PROJECT_PATH)\n", "os.environ.setdefault(\"DJANGO_SETTINGS_MODULE\", \"leagues.settings\")\n", "\n", "from leagues import settings\n", "settings.DATABASES['default']['NAME'] = PROJECT_PATH+'/db.sqlite3'\n", "\n", "import django\n", "django.setup()\n", "\n", "from scheduler.models import *\n", "from common.functions import distanceInKmByGPS\n", "season = Season.objects.filter(nicename=\"Imported: Benchmark Season\").first()\n", "import pandas as pd\n", "import numpy as np\n", "from django.db.models import Count, F, Value\n", "games = Game.objects.filter(season=season)\n", "df = pd.DataFrame.from_records(games.values())\n", "games = Game.objects.filter(season=season).annotate(\n", " home=F('homeTeam__shortname'),\n", " away=F('awayTeam__shortname'),\n", " home_lat=F('homeTeam__latitude'),\n", " home_lon=F('homeTeam__longitude'),\n", " home_attr=F('homeTeam__attractivity'),\n", " away_lat=F('awayTeam__latitude'),\n", " away_lon=F('awayTeam__longitude'),\n", " away_attr=F('awayTeam__attractivity')\n", ").values()\n", "\n" ] }, { "cell_type": "markdown", "id": "bc191792", "metadata": {}, "source": [ "#### Dataframe" ] }, { "cell_type": "code", "execution_count": 2, "id": "1e404cf8", "metadata": {}, "outputs": [], "source": [ "from sklearn.preprocessing import OneHotEncoder\n", "\n", "# create dataset\n", "df = pd.DataFrame.from_records(games.values())\n", "\n", "# pivots\n", "pivot_homeTeam_mean = df.pivot_table('attendance','homeTeam_id',aggfunc='mean')\n", "pivot_homeTeam_max = df.pivot_table('attendance','homeTeam_id',aggfunc='max')\n", "\n", "# add more features\n", "df['weekday'] = df.apply(lambda r: r['date'].weekday(), axis=1)\n", "df['day'] = df.apply(lambda r: r['date'].day, axis=1)\n", "df['month'] = df.apply(lambda r: r['date'].month, axis=1)\n", "df['year'] = df.apply(lambda r: r['date'].year, axis=1)\n", "df['distance'] = df.apply(lambda r: distanceInKmByGPS(r['home_lon'],r['home_lat'],r['away_lon'],r['away_lat']), axis=1)\n", "df['weekend'] = df.apply(lambda r: int(r['weekday'] in [6,7]), axis=1)\n", "df['winter_season'] = df.apply(lambda r: int(r['month'] in [1,2,3,10,11,12]), axis=1)\n", "df['home_base'] = df.apply(lambda r: pivot_homeTeam_mean.loc[r['homeTeam_id'],'attendance'], axis=1)\n", "df['stadium_size'] = df.apply(lambda r: pivot_homeTeam_max.loc[r['homeTeam_id'],'attendance'], axis=1)\n", "\n", "# one hot encoding\n", "ohe_fields = ['time', 'historic_season']\n", "\n", "for field in ohe_fields:\n", " ohe = OneHotEncoder()\n", " transformed = ohe.fit_transform(df[[field]])\n", " df[ohe.categories_[0]] = transformed.toarray()\n", "\n", "# sort label to last index\n", "cols = list(df.columns)\n", "cols.append(cols.pop(cols.index('attendance')))\n", "df = df[cols]" ] }, { "cell_type": "code", "execution_count": 3, "id": "e69d24dc", "metadata": {}, "outputs": [], "source": [ "#Importing Libraries\n", "import numpy as np # linear algebra\n", "import pandas as pd # data processing\n", "import matplotlib.pyplot as plt # plotting library\n", "from sklearn.model_selection import train_test_split,cross_val_score, cross_val_predict\n", "from sklearn import metrics\n", "from sklearn.linear_model import LinearRegression\n", "from sklearn.preprocessing import PolynomialFeatures\n", "from sklearn.tree import DecisionTreeRegressor\n", "from sklearn.ensemble import RandomForestRegressor" ] }, { "cell_type": "markdown", "id": "e2ea08e5", "metadata": {}, "source": [ "#### Train/Test Data - Normalization" ] }, { "cell_type": "code", "execution_count": 4, "id": "74e12f87", "metadata": {}, "outputs": [], "source": [ "from sklearn.model_selection import train_test_split\n", "\n", "\n", "remove_columns = ['season_id', 'resultEntered', 'reversible', 'reschedule', 'homeGoals', 'awayGoals',\n", " 'homeGoals2', 'awayGoals2', 'homeGoals3', 'awayGoals3', 'home', 'away', 'date', 'time', 'historic_season', 'id', 'homeTeam_id', 'awayTeam_id']\n", "feature_cols = list(set(df.columns[:-1]) - set(remove_columns))\n", "# feature_cols = ['weekday','weekend','home_base','distance','winter_season']\n", "label = 'attendance'\n", "\n", "\n", "X = df[feature_cols] # Features\n", "y = df[label] # Target variable\n", "\n", "X_train, X_test, y_train, y_test = train_test_split(\n", " X, y, test_size=0.3, random_state=1) # 70% training and 30% test" ] }, { "cell_type": "markdown", "id": "94ade4b4", "metadata": {}, "source": [ "#### Decision Tree" ] }, { "cell_type": "code", "execution_count": 5, "id": "4c9bdd0d", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "FITTING...done\n", "VISUALIZE\n" ] }, { "data": { "text/plain": [ "True" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "import pydotplus\n", "from six import StringIO\n", "from sklearn.tree import export_graphviz\n", "from sklearn.tree import DecisionTreeRegressor \n", "from sklearn.preprocessing import OneHotEncoder\n", "\n", "# Create Decision Tree classifer object\n", "regr = DecisionTreeRegressor(max_depth=5, random_state=1234)\n", "\n", "# Train Decision Tree Classifer\n", "print(\"FITTING...\", end=\"\")\n", "regr = regr.fit(X_train, y_train)\n", "print(\"done\")\n", "\n", "# Predict the response for test dataset\n", "y_pred = regr.predict(X_test)\n", "\n", "print(\"VISUALIZE\")\n", "dot_data = StringIO()\n", "export_graphviz(regr, out_file=dot_data,\n", " filled=True, rounded=True,\n", " special_characters=True, feature_names=feature_cols)\n", "graph = pydotplus.graph_from_dot_data(dot_data.getvalue())\n", "graph.write_png('attendance.png')\n", "# Image(graph.create_png())" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3.7.13 ('leagues')", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.13" }, "vscode": { "interpreter": { "hash": "a07b7f3079ca8c056705d3c757c4f3f92f9509f33eeab9ad5420dacec37bc01a" } } }, "nbformat": 4, "nbformat_minor": 5 }