research/dfbnet/kmeans_sachsen.py
2024-11-30 09:57:00 +01:00

666 lines
20 KiB
Python

# %%
from pulp import (
LpVariable,
LpProblem,
LpMinimize,
lpSum,
LpStatus,
value,
LpInteger,
LpContinuous,
XPRESS,
)
import googlemaps
from gmplot import GoogleMapPlotter
import json
import pandas as pd
import ast
import random
import itertools
import time
import os
from scipy.cluster.vq import kmeans, vq
import numpy as np
import matplotlib.pyplot as plt
os.environ["XPRESSDIR"] = "/opt/xpressmp_9.5.0"
os.environ["XPRESS"] = "/opt/xpressmp_9.5.0/bin"
os.environ["LD_LIBRARY_PATH"] = os.environ["XPRESSDIR"] + "/lib"
os.environ["DYLD_LIBRARY_PATH"] = os.environ["XPRESSDIR"] + "/lib"
os.environ["SHLIB_PATH"] = os.environ["XPRESSDIR"] + "/lib"
os.environ["LIBPATH"] = os.environ["XPRESSDIR"] + "/lib"
os.environ["PYTHONPATH"] = os.environ["XPRESSDIR"] + "/lib"
os.environ["CLASSPATH"] = os.environ["XPRESSDIR"] + "/lib/xprs.jar"
os.environ["CLASSPATH"] = os.environ["XPRESSDIR"] + "/lib/xprb.jar" + os.pathsep + os.environ["CLASSPATH"]
os.environ["CLASSPATH"] = os.environ["XPRESSDIR"] + "/lib/xprm.jar" + os.pathsep + os.environ["CLASSPATH"]
os.environ["PATH"] = os.environ["XPRESSDIR"] + "/bin" + os.pathsep + os.environ["PATH"]
# %%
from math import sqrt, sin, cos, atan2, pi
def degreesToRadians(degrees):
"""Convert degrees to radians"""
return degrees * pi / 180
def distanceInKmByGPS(lat1, lon1, lat2, lon2):
"""Calculate the distance between two points in km"""
earthRadiusKm = 6371
dLat = degreesToRadians(lat2 - lat1)
dLon = degreesToRadians(lon2 - lon1)
lat1 = degreesToRadians(lat1)
lat2 = degreesToRadians(lat2)
a = sin(dLat / 2) * sin(dLat / 2) + sin(dLon / 2) * sin(dLon / 2) * cos(lat1) * cos(
lat2
)
c = 2 * atan2(sqrt(a), sqrt(1 - a))
return int(earthRadiusKm * c)
def random_color():
return "#{:06x}".format(random.randint(0, 0xFFFFFF))
# %%
with open("data/sachsen.json", "r", encoding="utf-8") as f:
competitions = json.load(f)
competitions = {ast.literal_eval(k): v for k, v in competitions.items()}
# %%
# region
# STAFFELN PRO ART UND KLASSE
# ('Herren', 'Landesliga') 1
# ('Herren', 'Landesklasse') 3
# ('Frauen', 'Landesliga') 1
# ('Frauen', 'Landesklasse') 3
# ('A-Junioren', 'Landesliga') 1
# ('A-Junioren', 'Landesklasse') 4
# ('Herren', 'Kreisoberliga') 13
# ('Herren', '1.Kreisliga (A)') 19
# ('Herren', '2.Kreisliga (B)') 8
# ('Herren', '3.Kreisliga (C)') 1
# ('Herren', '1.Kreisklasse') 21
# ('Herren', '2.Kreisklasse') 9
# ('A-Junioren', 'Kreisoberliga') 10
# ('A-Junioren', '1.Kreisliga (A)') 6
# ('Frauen', 'Kreisoberliga') 4
# ('Frauen', '1.Kreisliga (A)') 1
# ('Frauen', '1.Kreisklasse') 3
# ('B-Junioren', 'Landesliga') 1
# ('B-Junioren', 'Landesklasse') 4
# ('B-Junioren', 'Kreisoberliga') 13
# ('B-Junioren', '1.Kreisliga (A)') 13
# ('B-Junioren', '1.Kreisklasse') 1
# ('C-Junioren', 'Landesliga') 1
# ('C-Junioren', 'Landesklasse') 4
# ('C-Junioren', 'Kreisoberliga') 16
# ('C-Junioren', '1.Kreisliga (A)') 15
# ('C-Junioren', '1.Kreisklasse') 9
# ('D-Junioren', 'Landesliga') 1
# ('D-Junioren', 'Landesklasse') 6
# ('D-Junioren', 'Kreisoberliga') 16
# ('D-Junioren', '1.Kreisliga (A)') 24
# ('D-Junioren', '2.Kreisliga (B)') 8
# ('D-Junioren', '3.Kreisliga (C)') 2
# ('D-Junioren', '1.Kreisklasse') 33
# ('D-Junioren', '2.Kreisklasse') 10
# ('B-Juniorinnen', 'Landesliga') 1
# ('B-Juniorinnen', 'Landesklasse') 2
# ('C-Juniorinnen', 'Landesklasse') 3
# ('D-Juniorinnen', 'Kreisoberliga') 1
# ('Herren Ü35', 'Kreisoberliga') 4
# ('Herren Ü35', '1.Kreisliga (A)') 3
# ('Herren Ü35', '1.Kreisklasse') 3
# ('Herren Ü35', '2.Kreisklasse') 1
# ('Herren Ü40', '1.Kreisliga (A)') 5
# ('Herren Ü40', '1.Kreisklasse') 1
# ('Herren Ü50', '1.Kreisliga (A)') 1
# ('Herren Ü50', '1.Kreisklasse') 1
# ('Freizeitsport', '1.Kreisliga (A)') 3
# ('Freizeitsport', '1.Kreisklasse') 2
# endregion
competition_details = {}
color = None
for staffel, attr in competitions.items():
# if (staffel[0], staffel[1]) != ('Herren', 'Kreisoberliga'):
# continue
competitions[staffel]["distance"] = []
if (staffel[0], staffel[1]) not in competition_details:
competition_details[(staffel[0], staffel[1])] = {
"nStaffeln": 1,
"nTeams": len(attr["teams"]),
"teams": attr["teams"],
"group_sizes": [len(attr["teams"])],
"clusters": {},
}
else:
competition_details[(staffel[0], staffel[1])]["nStaffeln"] += 1
competition_details[(staffel[0], staffel[1])]["group_sizes"].append(len(attr["teams"]))
competition_details[(staffel[0], staffel[1])]["nTeams"] += len(attr["teams"])
competition_details[(staffel[0], staffel[1])]["teams"] += attr["teams"]
"""" GENERATE ALL DISTANCES BETWEEN TEAMS """
distance_between_teams = {}
for competition, details in competition_details.items():
# competition = ('Herren', 'Kreisoberliga')
# details = competition_details[competition]
print(f"Calculating distances for {competition}")
for id, team1 in enumerate(details["teams"]):
team1['ID'] = id
distance_between_teams[team1["MANNSCHAFT"]] = {}
for team2 in details["teams"]:
distance = 0
if team1["MANNSCHAFT"] != team2["MANNSCHAFT"]:
distance = distanceInKmByGPS(
team1["LATITUDE"],
team1["LONGITUDE"],
team2["LATITUDE"],
team2["LONGITUDE"],
)
distance_between_teams[team1["MANNSCHAFT"]][
team2["MANNSCHAFT"]
] = distance
teams = details["teams"]
# print("Number of teams", len(teams))
locations = []
for team in teams:
locations.append([team["LATITUDE"], team["LONGITUDE"]])
data = np.array(locations)
k = details['nStaffeln']
# print("Number of groups", k)
centroids, _ = kmeans(data, k)
cluster_labels, _ = vq(data, centroids)
# print("Initial centroids", len(centroids), centroids)
for diff in range(len(centroids), k):
centroids = np.append(centroids, [[0, 0]], axis=0)
"""" RECLUSTERING THE COMPETITION INTO DIVISIONS """
improvement = True
it = 0
last_objective = False
while(improvement):
it += 1
print("Iteration", it)
model = LpProblem(f"KMeans_{it}", LpMinimize)
""" x = 1 if team i is in same division as j, 0 otherwise """
x = {}
""" g = 1 if team i is i group j, 0 otherwise """
groups = range(1, k+1)
g = {}
for team in teams:
for group in groups:
g[(team["MANNSCHAFT"], group)] = LpVariable(
f"team_{team['ID']}_{group}",
lowBound=0,
upBound=1,
cat=LpInteger,
)
""" Each team is in exactly one division """
for team in teams:
model += lpSum(g[(team["MANNSCHAFT"], group)] for group in groups) == 1
for group, group_size in enumerate(details["group_sizes"]):
# print(group+1, group_size)
model += lpSum(g[(team["MANNSCHAFT"], group+1)] for team in teams) == group_size
""" MINIMIZE THE DISTANCE TO THE CLUSTER CENTROID """
model += lpSum(g[team["MANNSCHAFT"], group] * distanceInKmByGPS(
team["LATITUDE"],
team["LONGITUDE"],
centroids[group - 1][0],
centroids[group - 1][1],
) for team in teams for group in groups
)
""" write the model to a file """
# model.writeLP(f"kmeans/kmeans_{competition}_{it}.lp")
model.solve(XPRESS(msg=0, gapRel=0.01))
if last_objective:
if last_objective <= value(model.objective):
improvement = False
last_objective = value(model.objective)
""" recompute the centroids """
centroids = []
for group in groups:
latitudes = []
longitudes = []
for team in teams:
if value(g[(team["MANNSCHAFT"], group)]) > 0.9:
latitudes.append(team["LATITUDE"])
longitudes.append(team["LONGITUDE"])
centroids.append([np.mean(latitudes), np.mean(longitudes)])
clusters = {k: [] for k in range(1, len(groups)+1)}
augmented_teams = []
for group in groups:
for team in teams:
if value(g[(team["MANNSCHAFT"], group)]) > 0.9:
clusters[group].append(team)
competition_details[competition]["clusters"] = clusters
some_colors = [
"red",
"blue",
"green",
"yellow",
"purple",
"orange",
"pink",
"brown",
"black",
"white",
"gray",
"cyan",
"magenta",
"lime",
"indigo",
"violet",
"turquoise",
"gold",
"silver",
"beige",
"maroon",
"olive",
"navy",
"teal",
"coral",
"lavender",
"salmon",
"chocolate",
"crimson",
"aqua",
"ivory",
"khaki",
"plum",
"orchid",
"peru",
"tan",
"tomato",
"wheat",
"azure",
"mint",
"apricot",
"chartreuse",
"amber",
"fuchsia",
"jade",
"ruby",
"amethyst",
"rose",
"sapphire",
"cerulean",
"moss",
"denim",
"copper",
"peach",
"sand",
"pearl",
"mulberry",
"lemon",
"cream",
"ocher",
"brass",
"eggplant",
"cinnamon",
"mustard",
"rust",
"sienna",
"sepia",
"umber",
"limegreen",
"seagreen",
"forestgreen",
"dodgerblue",
"mediumslateblue",
"royalblue",
"firebrick",
"darkolivegreen",
"midnightblue",
"darkturquoise",
"lightcoral",
"palevioletred",
"hotpink",
"deeppink",
"darkkhaki",
"lightseagreen",
"darkslategray",
"slategray",
"lightsteelblue",
"skyblue",
"lightblue",
"powderblue",
"darkorange",
"lightsalmon",
"indianred",
"thistle",
"burlywood",
"mediumaquamarine",
"mediumorchid",
"mediumvioletred",
"papayawhip",
"moccasin",
"bisque",
"blanchedalmond",
"antiquewhite",
"mistyrose",
"lavenderblush",
"linen",
"snow",
"honeydew",
"palegreen",
"lightcyan",
"aliceblue",
"ghostwhite",
"whitesmoke",
"gainsboro",
]
latitude = 51.18292980165227
longitude = 13.11435805600463
gmap = GoogleMapPlotter(
latitude, longitude, 8, apikey="AIzaSyAPzFyMk3ZA0kL9TUlJ_kpV_IY56uBwdrc"
)
aggregated_distance = 0
distance_for_team = {}
for cluster, teamslist in clusters.items():
latitudes = []
longitudes = []
markers_text = []
color = some_colors.pop(0)
cluster_distance = 0
for team1 in teamslist:
distance_for_team[team1["MANNSCHAFT"]] = []
for team2 in teamslist:
distance = 0
if team1["MANNSCHAFT"] != team2["MANNSCHAFT"]:
distance = distance_between_teams[team1["MANNSCHAFT"]][team2["MANNSCHAFT"]]
cluster_distance += distance
aggregated_distance += distance
distance_for_team[team1["MANNSCHAFT"]].append(distance)
latitudes.append(team1["LATITUDE"])
longitudes.append(team1["LONGITUDE"])
markers_text.append(f"{team1['MANNSCHAFT']} @{team1['SPIELSTAETTE']}")
# Plot the points on the map
gmap.scatter(latitudes, longitudes, color=color, size=40, marker=False)
for (lat1, lon1), (lat2, lon2) in itertools.combinations(
zip(latitudes, longitudes), 2
):
gmap.plot([lat1, lat2], [lon1, lon2], color=color, edge_width=2)
for lat, lon, text in zip(latitudes, longitudes, markers_text):
gmap.marker(lat, lon, title=text.replace('"', ""), color=color)
print(cluster, len(teamslist), cluster_distance, aggregated_distance, color)
gmap.draw(f"kmeans/map_mip_{competition}.html")
# %%
""" DUMP THE COMPETITIONS """
from datetime import datetime, time, date
# from competitions import get_teams_from_staffel
from schluesselzahlen import get_schluesselzahlen
from rahmentermine import get_rahmentermine
from spielstaetten import get_venues
# staffel = "Brandible Stadtliga B"
# teams = get_teams_from_staffel(staffel)
def datetime_serializer(obj):
if isinstance(obj, datetime) or isinstance(obj, date) or isinstance(obj, time):
return obj.isoformat() # or use obj.strftime("%Y-%m-%d %H:%M:%S")
raise TypeError("Type not serializable")
staffeln = []
divisions = []
courts = []
court_names = []
venues = []
for competition, details in competition_details.items():
for cluster, cluster_teams in details['clusters'].items():
print(f"Processing {competition} {cluster}")
nTeams = len(cluster_teams) + len(cluster_teams) % 2
pattern, opponent = get_schluesselzahlen(nTeams)
teams = cluster_teams
ms_art = teams[0]['MS_ART']
if ms_art in ["A-Junioren","B-Junioren","C-Junioren","D-Junioren","D-Juniorinnen"]:
ms_art = "Junioren A-D"
elif ms_art in ["Herren Ü50"]:
ms_art = "Senioren Ü50"
elif ms_art in ["Herren Ü35"]:
ms_art = "Senioren Ü35"
rahmentermine = get_rahmentermine(ms_art, nTeams)
if not rahmentermine:
print("No rahmentermine for", competition, cluster, ms_art, nTeams)
continue
divisions.append({
"name": f"{competition[0]} {competition[1]} {cluster}",
"teams": teams,
"nTeams": nTeams,
"ms_art": ms_art,
"pattern": pattern,
"opponent": opponent,
"rahmentermine": rahmentermine
})
for t in teams:
if not t['SPIELSTAETTE'] in court_names:
# courts += [{"name":t['SPIELSTAETTE']}]
venues.append({
"SB_SPST_ID": len(venues)+1,
"SB_SPST_GEBIET_REF": len(venues)+1,
"SB_SPST_NAME": t['SPIELSTAETTE'],
"SB_SPST_TYP_REF": 1,
"SB_SPST_ZUSTAND_REF": 1,
"SB_SPST_PLATZ_NR": "(null)",
"SB_SPST_FLUTLICHT": "t",
"SB_SPST_SPIELE_PARALLEL_MAX": 3,
"SB_SPST_ANSTOSSZEIT_VON": "08:30:00",
"SB_SPST_ANSTOSSZEIT_BIS": "20:30:00",
"SB_SPST_ANZ_UMKLEIDEN": 5,
"SB_SPST_MITTAGSPAUSE_VON": "(null)",
"SB_SPST_MITTAGSPAUSE_BIS": "(null)",
"SB_SPST_GROESSE_REF": 1,
"SB_SPST_SPIELE_TAG_MAX": 20,
"SB_SPST_ANZ_TORE": 6,
"SB_SPST_SPIELE_ABSTAND": 0,
"latitude": t["LATITUDE"],
"longitude": t["LONGITUDE"],
})
court_names += [t['SPIELSTAETTE']]
""" dump json """
import json
with open("kmeans/competitions.json", "w", encoding="utf-8") as f:
json.dump({'divisions':divisions,'venues':venues}, f, default=datetime_serializer, ensure_ascii=False, indent=4)
# # %%
# %%
with open("kmeans/competitions.json", "r", encoding="utf-8") as f:
data = json.load(f)
from pulp import LpVariable, LpProblem, LpMinimize, lpSum, LpStatus, value, LpInteger, XPRESS
model = LpProblem("Spielplan", LpMinimize)
x = {}
home = {}
assignPattern = {}
divisions = data['divisions']
venues = data['venues']
max_rounds = 0
team_id = 0
for division_id, division in enumerate(divisions):
division['id'] = division_id
rahmentermine = division['rahmentermine']
teams = division['teams']
pattern = division['pattern']
opponent = division['opponent']
nTeams = division['nTeams']
ms_art = division['ms_art']
for t in teams:
t['id'] = team_id
team_id += 1
# %%
""" Create pulp model for solving a schedule for a given set of teams and rahmentermine """
rounds1 = list(range(1, len(rahmentermine)//2+1))
rounds2 = list(range(len(rahmentermine)//2+1, len(rahmentermine)+1))
rounds = rounds1 + rounds2
max_rounds = max(max_rounds, len(rounds1))
# %%
# Create a variable for each team and each rahmentermin
for team in teams:
for round in rounds:
x[(team['id'], round)] = LpVariable(
f"team_{team['id']}_{round}",
lowBound=0,
upBound=1,
cat=LpInteger,
)
# Create home variables """
for team in teams:
for round in rounds:
home[(team['id'], round)] = LpVariable(
f"home_{team['id']}_{round}",
lowBound=0,
upBound=1,
cat=LpInteger,
)
# Create pattern variables
for team in teams:
for p in pattern:
assignPattern[(team['id'], p)] = LpVariable(
f"pattern_{team['id']}_{p}",
lowBound=0,
upBound=1,
cat=LpInteger,
)
""" Each team exactly one pattern """
for team in teams:
model += (lpSum(assignPattern[(team['id'], p)] for p in pattern) == 1, f"team_{team['id']}_one_pattern")
# if team['SPIELSTAETTE'].strip() not in [venue['SB_SPST_NAME'] for venue in venues]:
# print(f"Venue {team['SPIELSTAETTE']} not found in venues")
# exit()
# else:
# print(f"Venue {team['SPIELSTAETTE']} found in venues")
""" Patterns cannot be used more than once """
for p in pattern:
model += (lpSum(assignPattern[(team['id'], p)] for team in teams) <= 1, f"pattern_{p}_used_once_in_division_{division['id']}")
""" Couple patterns with home variables """
for round in rounds1:
for team in teams:
model += (lpSum(assignPattern[(team['id'], p)] for p in pattern if pattern[p][round-1] == "H") == home[(team['id'], round)], f"coupling_pattern_home_{team['id']}_{round}")
model.solve(XPRESS(msg=1))
csv_file = open("kmeans/schedule.csv", "w")
csv_file.write("round,venue,day,division,hometeam,awayteam,homepattern,awaypattern,wunschtag,wunschzeit\n")
""" print patterns """
for round in range(1,max_rounds+1):
for venue in venues:
print(f"Round {round} at {venue['SB_SPST_NAME']}")
for division in divisions:
if division['nTeams'] <= round:
continue
for team in division['teams']:
if team['SPIELSTAETTE'] == venue['SB_SPST_NAME']:
if value(home.get((team['id'], round),0)) == 1:
p1 = [p for p in division['pattern'] if assignPattern[(team['id'], p)].varValue == 1][0]
p2 = 0
o = None
for t2 in division['teams']:
p2 = [p for p in division['pattern'] if assignPattern[(t2['id'], p)].varValue == 1][0]
# print(round,division['nTeams'])
if int(p2) == int(division['opponent'][p1][round-1]):
o = t2['MANNSCHAFT']
break
# print(f"{round} ({team['SPIELSTAETTE']}, {venue['SB_SPST_SPIELE_TAG_MAX']}): {division['name']} - {team['MANNSCHAFT']} - {p1} - {team['WUNSCH_TAG']} - {team['WUNSCH_ZEIT']} vs {o} - {p2}")
if o:
csv_file.write(f"{round},{team['SPIELSTAETTE'].replace(","," ")},{venue['SB_SPST_SPIELE_TAG_MAX']},{division['name']},{team['MANNSCHAFT']},{o},{p1},{p2},{team['WUNSCH_TAG']},{team['WUNSCH_ZEIT']}\n")
csv_file.close()