research/dfbnet/competitions_sachsen.py
2024-11-30 09:57:00 +01:00

650 lines
16 KiB
Python

# %%
import googlemaps
from gmplot import GoogleMapPlotter
import json
import pandas as pd
import ast
import random
import itertools
# %%
def convert_xlsx_to_json():
gmaps = googlemaps.Client(key="AIzaSyB76EhR4OqjdXHQUiTkHZC0Svx_7cPGqyU")
staffel = None
dresden = pd.read_excel("data/beispiel_daten_sachsen.xlsx")
# staffeln = dresden["STAFFEL"].unique()
unique_staffeln = list(
dresden[["MS_ART", "SP_KLASSE", "STAFFEL"]]
.drop_duplicates()
.itertuples(index=False, name=None)
)
competitions_dict = {s: {} for s in unique_staffeln}
teams_in_competition = {}
staffel_type = None
for art, klasse, staffel in unique_staffeln:
find_duplicates = []
teams_in_competition[(art, klasse, staffel)] = []
teams = dresden[
(dresden["MS_ART"] == art)
& (dresden["SP_KLASSE"] == klasse)
& (dresden["STAFFEL"] == staffel)
][
[
"GEB_VEREIN",
"GEB_MS",
"MANNSCHAFT",
"MS_KEY",
"MS_ART",
"SP_KLASSE",
"STAFFEL",
"SCHLUESSEL_ZAHL",
"WUNSCH_WOCHENTAG",
"WUNSCH_TAG",
"WUNSCH_ZEIT",
"SPIELSTAETTE",
]
].to_dict(
orient="records"
)
for t in teams:
if t["MANNSCHAFT"] not in find_duplicates:
teams_in_competition[(art, klasse, staffel)].append(t)
find_duplicates.append(t["MANNSCHAFT"])
geocode_result = gmaps.geocode(
f"{t['GEB_VEREIN']} {t['MANNSCHAFT']} {t['SPIELSTAETTE']}"
)
latitude = 0
longitude = 0
if len(geocode_result) > 0:
location = geocode_result[0]["geometry"]["location"]
latitude = location["lat"]
longitude = location["lng"]
t["LATITUDE"] = latitude
t["LONGITUDE"] = longitude
competitions_dict[(art, klasse, staffel)]["teams"] = teams_in_competition[
(art, klasse, staffel)
]
competitions_dict[(art, klasse, staffel)]["nTeams"] = len(
teams_in_competition[(art, klasse, staffel)]
)
competitions_dict[(art, klasse, staffel)]["art"] = art
competitions_dict[(art, klasse, staffel)]["klasse"] = klasse
competitions_dict[(art, klasse, staffel)]["staffel"] = staffel
competitions_dict_list_keys = {str(k): v for k, v in competitions_dict.items()}
with open("data/sachsen.json", "w", encoding="utf-8") as f:
json.dump(
competitions_dict_list_keys, f, ensure_ascii=False, indent=4, default=str
)
# %%
from math import sqrt, sin, cos, atan2, pi
def degreesToRadians(degrees):
"""Convert degrees to radians"""
return degrees * pi / 180
def distanceInKmByGPS(lat1, lon1, lat2, lon2):
"""Calculate the distance between two points in km"""
earthRadiusKm = 6371
dLat = degreesToRadians(lat2 - lat1)
dLon = degreesToRadians(lon2 - lon1)
lat1 = degreesToRadians(lat1)
lat2 = degreesToRadians(lat2)
a = sin(dLat / 2) * sin(dLat / 2) + sin(dLon / 2) * sin(dLon / 2) * cos(lat1) * cos(
lat2
)
c = 2 * atan2(sqrt(a), sqrt(1 - a))
return int(earthRadiusKm * c)
# %%
with open("data/sachsen.json", "r", encoding="utf-8") as f:
competitions = json.load(f)
competitions = {ast.literal_eval(k): v for k, v in competitions.items()}
# STAFFELN PRO ART UND KLASSE
# ('Herren', 'Landesliga') 1
# ('Herren', 'Landesklasse') 3
# ('Frauen', 'Landesliga') 1
# ('Frauen', 'Landesklasse') 3
# ('A-Junioren', 'Landesliga') 1
# ('A-Junioren', 'Landesklasse') 4
# ('Herren', 'Kreisoberliga') 13
# ('Herren', '1.Kreisliga (A)') 19
# ('Herren', '2.Kreisliga (B)') 8
# ('Herren', '3.Kreisliga (C)') 1
# ('Herren', '1.Kreisklasse') 21
# ('Herren', '2.Kreisklasse') 9
# ('A-Junioren', 'Kreisoberliga') 10
# ('A-Junioren', '1.Kreisliga (A)') 6
# ('Frauen', 'Kreisoberliga') 4
# ('Frauen', '1.Kreisliga (A)') 1
# ('Frauen', '1.Kreisklasse') 3
# ('B-Junioren', 'Landesliga') 1
# ('B-Junioren', 'Landesklasse') 4
# ('B-Junioren', 'Kreisoberliga') 13
# ('B-Junioren', '1.Kreisliga (A)') 13
# ('B-Junioren', '1.Kreisklasse') 1
# ('C-Junioren', 'Landesliga') 1
# ('C-Junioren', 'Landesklasse') 4
# ('C-Junioren', 'Kreisoberliga') 16
# ('C-Junioren', '1.Kreisliga (A)') 15
# ('C-Junioren', '1.Kreisklasse') 9
# ('D-Junioren', 'Landesliga') 1
# ('D-Junioren', 'Landesklasse') 6
# ('D-Junioren', 'Kreisoberliga') 16
# ('D-Junioren', '1.Kreisliga (A)') 24
# ('D-Junioren', '2.Kreisliga (B)') 8
# ('D-Junioren', '3.Kreisliga (C)') 2
# ('D-Junioren', '1.Kreisklasse') 33
# ('D-Junioren', '2.Kreisklasse') 10
# ('B-Juniorinnen', 'Landesliga') 1
# ('B-Juniorinnen', 'Landesklasse') 2
# ('C-Juniorinnen', 'Landesklasse') 3
# ('D-Juniorinnen', 'Kreisoberliga') 1
# ('Herren Ü35', 'Kreisoberliga') 4
# ('Herren Ü35', '1.Kreisliga (A)') 3
# ('Herren Ü35', '1.Kreisklasse') 3
# ('Herren Ü35', '2.Kreisklasse') 1
# ('Herren Ü40', '1.Kreisliga (A)') 5
# ('Herren Ü40', '1.Kreisklasse') 1
# ('Herren Ü50', '1.Kreisliga (A)') 1
# ('Herren Ü50', '1.Kreisklasse') 1
# ('Freizeitsport', '1.Kreisliga (A)') 3
# ('Freizeitsport', '1.Kreisklasse') 2
some_colors = [
"red",
"blue",
"green",
"yellow",
"purple",
"orange",
"pink",
"brown",
"black",
"white",
"gray",
"cyan",
"magenta",
"lime",
"indigo",
"violet",
"turquoise",
"gold",
"silver",
"beige",
"maroon",
"olive",
"navy",
"teal",
"coral",
"lavender",
"salmon",
"chocolate",
"crimson",
"aqua",
"ivory",
"khaki",
"plum",
"orchid",
"peru",
"tan",
"tomato",
"wheat",
"azure",
"mint",
"apricot",
"chartreuse",
"amber",
"fuchsia",
"jade",
"ruby",
"amethyst",
"rose",
"sapphire",
"cerulean",
"moss",
"denim",
"copper",
"peach",
"sand",
"pearl",
"mulberry",
"lemon",
"cream",
"ocher",
"brass",
"eggplant",
"cinnamon",
"mustard",
"rust",
"sienna",
"sepia",
"umber",
"limegreen",
"seagreen",
"forestgreen",
"dodgerblue",
"mediumslateblue",
"royalblue",
"firebrick",
"darkolivegreen",
"midnightblue",
"darkturquoise",
"lightcoral",
"palevioletred",
"hotpink",
"deeppink",
"darkkhaki",
"lightseagreen",
"darkslategray",
"slategray",
"lightsteelblue",
"skyblue",
"lightblue",
"powderblue",
"darkorange",
"lightsalmon",
"indianred",
"thistle",
"burlywood",
"mediumaquamarine",
"mediumorchid",
"mediumvioletred",
"papayawhip",
"moccasin",
"bisque",
"blanchedalmond",
"antiquewhite",
"mistyrose",
"lavenderblush",
"linen",
"snow",
"honeydew",
"palegreen",
"lightcyan",
"aliceblue",
"ghostwhite",
"whitesmoke",
"gainsboro",
]
latitude = 51.18292980165227
longitude = 13.11435805600463
gmap = GoogleMapPlotter(
latitude, longitude, 8, apikey="AIzaSyAPzFyMk3ZA0kL9TUlJ_kpV_IY56uBwdrc"
)
def random_color():
return "#{:06x}".format(random.randint(0, 0xFFFFFF))
competition_details = {}
color = None
for staffel, attr in competitions.items():
if (staffel[0], staffel[1]) != ("Herren", "Kreisoberliga"):
# if (staffel[0], staffel[1]) != ('Herren', '1.Kreisklasse'):
continue
competitions[staffel]["distance"] = []
if (staffel[0], staffel[1]) not in competition_details:
competition_details[(staffel[0], staffel[1])] = {
"nStaffeln": 1,
"nTeams": 0,
"previous_distances": [],
"teams": [],
}
color = some_colors.pop(0)
else:
competition_details[(staffel[0], staffel[1])]["nStaffeln"] += 1
color = some_colors.pop(0)
latitudes = []
longitudes = []
markers_text = []
for team1 in attr["teams"]:
competition_details[(staffel[0], staffel[1])]["nTeams"] += 1
competition_details[(staffel[0], staffel[1])]["teams"].append(team1)
for team2 in attr["teams"]:
distance = 0
if team1["MANNSCHAFT"] != team2["MANNSCHAFT"]:
distance = distanceInKmByGPS(
team1["LATITUDE"],
team1["LONGITUDE"],
team2["LATITUDE"],
team2["LONGITUDE"],
)
competition_details[(staffel[0], staffel[1])][
"previous_distances"
].append(distance)
competitions[staffel]["distance"].append(distance)
latitudes.append(team1["LATITUDE"])
longitudes.append(team1["LONGITUDE"])
markers_text.append(f"{team1['MANNSCHAFT']} @{team1['SPIELSTAETTE']}")
# Plot the points on the map
gmap.scatter(latitudes, longitudes, color=color, size=40, marker=False)
for (lat1, lon1), (lat2, lon2) in itertools.combinations(
zip(latitudes, longitudes), 2
):
gmap.plot([lat1, lat2], [lon1, lon2], color=color, edge_width=2)
for lat, lon, text in zip(latitudes, longitudes, markers_text):
gmap.marker(lat, lon, title=text.replace('"', ""), color=color)
print(color, staffel, attr["nTeams"], sum(attr["distance"]))
for competition, details in competition_details.items():
print(
competition,
details["nStaffeln"],
details["nTeams"],
sum(details["previous_distances"]),
)
# Optionally, draw a line path connecting the points
# gmap.plot(latitudes, longitudes, color='blue', edge_width=2.5)
# Save the map to an HTML file
gmap.draw("map_previous.html")
# %%
# for key, value in competition_details.items():
# print(key,value['nStaffeln'])
# %%
"""" GENERATE ALL DISTANCES BETWEEN TEAMS """
distance_between_teams = {}
for competition, details in competition_details.items():
print(f"Calculating distances for {competition}")
for team1 in details["teams"]:
distance_between_teams[team1["MANNSCHAFT"]] = {}
for team2 in details["teams"]:
distance = 0
if team1["MANNSCHAFT"] != team2["MANNSCHAFT"]:
distance = distanceInKmByGPS(
team1["LATITUDE"],
team1["LONGITUDE"],
team2["LATITUDE"],
team2["LONGITUDE"],
)
distance_between_teams[team1["MANNSCHAFT"]][team2["MANNSCHAFT"]] = distance
for comp, attr in competition_details.items():
teams = attr["teams"]
print(teams)
"""" RECLUSTERING THE COMPETITION INTO DIVISIONS """
# from pulp import (
# LpVariable,
# LpProblem,
# LpMinimize,
# lpSum,
# LpStatus,
# value,
# LpInteger,
# XPRESS,
# )
# model = LpProblem("Cluster", LpMinimize)
# """ x = 1 if team i is in same division as j, 0 otherwise """
# x = {}
# for team1 in teams:
# for team2 in teams:
# x[(team1["MANNSCHAFT"], team2["MANNSCHAFT"])] = LpVariable(
# f"team_{team1['MANNSCHAFT']}_{team2['MANNSCHAFT']}",
# lowBound=0,
# upBound=1,
# cat=LpInteger,
# )
# """ g = 1 if team i is i group j, 0 otherwise """
# groups = range(1,20)
# g = {}
# for team in teams:
# for group in groups:
# g[(team["MANNSCHAFT"], group)] = LpVariable(
# f"team_{team['MANNSCHAFT']}_{group}",
# lowBound=0,
# upBound=1,
# cat=LpInteger,
# )
# """ Each team is in exactly one division """
# for team1 in teams:
# model += lpSum(g[(team["MANNSCHAFT"], group)] for group in groups) == 1
# """ Each team is in same divisin as itself """
# for team in teams:
# model += x[(team["MANNSCHAFT"], team["MANNSCHAFT"])] == 1
# """ Each team is in same division with at least 14 and at most 16 other teams"""
# for team1 in teams:
# model += (
# lpSum(
# x[(team1["MANNSCHAFT"], team2["MANNSCHAFT"])] for team2 in teams
# )
# >= 14
# )
# model += (
# lpSum(
# x[(team1["MANNSCHAFT"], team2["MANNSCHAFT"])] for team2 in teams
# )
# <= 16
# )
# """ no more than 16 teams in a division """
# for group in groups:
# model += lpSum(g[(team["MANNSCHAFT"], group)] for team in teams) <= 16
# """ if team1 and team2 are paired, than they are in the same division """
# for group in groups:
# for team1 in teams:
# for team2 in teams:
# if team1["MANNSCHAFT"] != team2["MANNSCHAFT"]:
# model += x[(team1["MANNSCHAFT"], team2["MANNSCHAFT"])] + g[(team1["MANNSCHAFT"], group)] <= 1 + g[(team2["MANNSCHAFT"], group)]
# model += x[(team1["MANNSCHAFT"], team2["MANNSCHAFT"])] + g[(team2["MANNSCHAFT"], group)] <= 1 + g[(team1["MANNSCHAFT"], group)]
# """ symmetry constraint """
# for t1, t2 in x.keys():
# model += x[(t1, t2)] == x[(t2, t1)]
# """ MINIMIZE THE TRAVEL DISTANCE """
# model += lpSum(
# distance_between_teams[team1["MANNSCHAFT"]][team2["MANNSCHAFT"]]
# * x[(team1["MANNSCHAFT"], team2["MANNSCHAFT"])]
# for team1 in teams
# for team2 in teams
# )
# model.solve(XPRESS(msg=1, gapRel=0.5))
# new_cluster = 1
# for group in groups:
# for team in teams:
# if value(g[(team['MANNSCHAFT'], group)]) > 0.9:
# print(f"TEAM {team['MANNSCHAFT']} - {group}")
# # for team1 in teams:
# # for team2 in teams:
# # if value(x[(team1["MANNSCHAFT"], team2["MANNSCHAFT"])]) == 1:
# # print(
# # f"SAME CLUSTER {team1['MANNSCHAFT']} - {team2['MANNSCHAFT']} - {value(g[(team1["MANNSCHAFT"], group)])}"
# # )
# # some_colors = [
# # "red",
# # "blue",
# # "green",
# # "yellow",
# # "purple",
# # "orange",
# # "pink",
# # "brown",
# # "black",
# # "white",
# # "gray",
# # "cyan",
# # "magenta",
# # "lime",
# # "indigo",
# # "violet",
# # "turquoise",
# # "gold",
# # "silver",
# # "beige",
# # "maroon",
# # "olive",
# # "navy",
# # "teal",
# # "coral",
# # "lavender",
# # "salmon",
# # "chocolate",
# # "crimson",
# # "aqua",
# # "ivory",
# # "khaki",
# # "plum",
# # "orchid",
# # "peru",
# # "tan",
# # "tomato",
# # "wheat",
# # "azure",
# # "mint",
# # "apricot",
# # "chartreuse",
# # "amber",
# # "fuchsia",
# # "jade",
# # "ruby",
# # "amethyst",
# # "rose",
# # "sapphire",
# # "cerulean",
# # "moss",
# # "denim",
# # "copper",
# # "peach",
# # "sand",
# # "pearl",
# # "mulberry",
# # "lemon",
# # "cream",
# # "ocher",
# # "brass",
# # "eggplant",
# # "cinnamon",
# # "mustard",
# # "rust",
# # "sienna",
# # "sepia",
# # "umber",
# # "limegreen",
# # "seagreen",
# # "forestgreen",
# # "dodgerblue",
# # "mediumslateblue",
# # "royalblue",
# # "firebrick",
# # "darkolivegreen",
# # "midnightblue",
# # "darkturquoise",
# # "lightcoral",
# # "palevioletred",
# # "hotpink",
# # "deeppink",
# # "darkkhaki",
# # "lightseagreen",
# # "darkslategray",
# # "slategray",
# # "lightsteelblue",
# # "skyblue",
# # "lightblue",
# # "powderblue",
# # "darkorange",
# # "lightsalmon",
# # "indianred",
# # "thistle",
# # "burlywood",
# # "mediumaquamarine",
# # "mediumorchid",
# # "mediumvioletred",
# # "papayawhip",
# # "moccasin",
# # "bisque",
# # "blanchedalmond",
# # "antiquewhite",
# # "mistyrose",
# # "lavenderblush",
# # "linen",
# # "snow",
# # "honeydew",
# # "palegreen",
# # "lightcyan",
# # "aliceblue",
# # "ghostwhite",
# # "whitesmoke",
# # "gainsboro",
# # ]
# # latitude = 51.18292980165227
# # longitude = 13.11435805600463
# # gmap = GoogleMapPlotter(
# # latitude, longitude, 8, apikey="AIzaSyAPzFyMk3ZA0kL9TUlJ_kpV_IY56uBwdrc"
# # )
# # # Plot the points on the map
# # gmap.scatter(latitudes, longitudes, color=color, size=40, marker=False)
# # for (lat1, lon1), (lat2, lon2) in itertools.combinations(
# # zip(latitudes, longitudes), 2
# # ):
# # gmap.plot([lat1, lat2], [lon1, lon2], color=color, edge_width=2)
# # for lat, lon, text in zip(latitudes, longitudes, markers_text):
# # gmap.marker(lat, lon, title=text.replace('"', ""), color=color)
# %%