Spaces:

phongdtd
/

WC2022_predictor

Build error

WC2022_predictor / ml /predictor.py

phong.dao

init app

9e6c24e over 3 years ago

13.4 kB

	import os.path
	from operator import itemgetter
	from typing import Text, Tuple

	import numpy as np
	import pandas as pd
	import requests

	from configs.config import cfg
	from configs.constants import DATA_ROOT
	from ml.model import MLModel
	from ml.utils import load_pickle

	from datetime import tzinfo, timedelta, datetime

	ZERO = timedelta(0)


	class UTC(tzinfo):
	def utcoffset(self, dt):
	return ZERO

	def tzname(self, dt):
	return "UTC"

	def dst(self, dt):
	return ZERO


	class Predictor:
	"""
	A match predictor using ML
	"""

	def __init__(self, base_df: pd.DataFrame, model: MLModel):
	self.model = model
	self.base_df = base_df

	def find_stats(self, team):
	"""

	:param team: Name of the team, eg: Qatar, etc.
	:return:
	"""

	last_game = self.base_df[
	(self.base_df["home_team"] == team) \| (self.base_df["away_team"] == team)
	].tail(1)

	if last_game["home_team"].values[0] == team:
	team_rank = last_game["rank_home"].values[0]
	team_goals = last_game["home_goals_mean"].values[0]
	team_goals_l5 = last_game["home_goals_mean_l5"].values[0]
	team_goals_suf = last_game["home_goals_suf_mean"].values[0]
	team_goals_suf_l5 = last_game["home_goals_suf_mean_l5"].values[0]
	team_rank_suf = last_game["home_rank_mean"].values[0]
	team_rank_suf_l5 = last_game["home_rank_mean_l5"].values[0]
	team_gp_rank = last_game["home_game_points_rank_mean"].values[0]
	team_gp_rank_l5 = last_game["home_game_points_rank_mean_l5"].values[0]
	else:
	team_rank = last_game["rank_away"].values[0]
	team_goals = last_game["away_goals_mean"].values[0]
	team_goals_l5 = last_game["away_goals_mean_l5"].values[0]
	team_goals_suf = last_game["away_goals_suf_mean"].values[0]
	team_goals_suf_l5 = last_game["away_goals_suf_mean_l5"].values[0]
	team_rank_suf = last_game["away_rank_mean"].values[0]
	team_rank_suf_l5 = last_game["away_rank_mean_l5"].values[0]
	team_gp_rank = last_game["away_game_points_rank_mean"].values[0]
	team_gp_rank_l5 = last_game["away_game_points_rank_mean_l5"].values[0]

	return [
	team_rank,
	team_goals,
	team_goals_l5,
	team_goals_suf,
	team_goals_suf_l5,
	team_rank_suf,
	team_rank_suf_l5,
	team_gp_rank,
	team_gp_rank_l5,
	]

	@staticmethod
	def find_features(team_1, team_2):
	"""

	:param team_1:
	:param team_2:
	:return:
	"""
	rank_dif = team_1[0] - team_2[0]
	goals_dif = team_1[1] - team_2[1]
	goals_dif_l5 = team_1[2] - team_2[2]
	goals_suf_dif = team_1[3] - team_2[3]
	goals_suf_dif_l5 = team_1[4] - team_2[4]
	goals_per_ranking_dif = (team_1[1] / team_1[5]) - (team_2[1] / team_2[5])
	dif_rank_agst = team_1[5] - team_2[5]
	dif_rank_agst_l5 = team_1[6] - team_2[6]
	dif_gp_rank = team_1[7] - team_2[7]
	dif_gp_rank_l5 = team_1[8] - team_2[8]

	return [
	rank_dif,
	goals_dif,
	goals_dif_l5,
	goals_suf_dif,
	goals_suf_dif_l5,
	goals_per_ranking_dif,
	dif_rank_agst,
	dif_rank_agst_l5,
	dif_gp_rank,
	dif_gp_rank_l5,
	1,
	0,
	]

	def __predict(self, team_1: Text, team_2: Text):

	team_1_stat = self.find_stats(team_1)
	team_2_stat = self.find_stats(team_2)

	features_g1 = self.find_features(team_1_stat, team_2_stat)
	features_g2 = self.find_features(team_2_stat, team_1_stat)

	probs_g1 = self.model.predict_proba([features_g1])
	probs_g2 = self.model.predict_proba([features_g2])
	team_1_prob_g1 = probs_g1[0][0]
	team_1_prob_g2 = probs_g2[0][1]
	team_2_prob_g1 = probs_g1[0][1]
	team_2_prob_g2 = probs_g2[0][0]

	team_1_prob = (probs_g1[0][0] + probs_g2[0][1]) / 2
	team_2_prob = (probs_g2[0][0] + probs_g1[0][1]) / 2

	return (
	team_1_prob_g1,
	team_1_prob_g2,
	team_1_prob,
	team_2_prob,
	team_2_prob_g1,
	team_2_prob_g2,
	)

	def predict(self, team_1: Text, team_2: Text) -> Tuple[bool, Text, float]:
	"""

	:param team_1:
	:param team_2:
	:return:
	"""
	draw = False
	(
	team_1_prob_g1,
	team_1_prob_g2,
	team_1_prob,
	team_2_prob,
	team_2_prob_g1,
	team_2_prob_g2,
	) = self.__predict(team_1, team_2)
	winner, winner_proba = "", 0.0
	if ((team_1_prob_g1 > team_2_prob_g1) & (team_2_prob_g2 > team_1_prob_g2)) \| (
	(team_1_prob_g1 < team_2_prob_g1) & (team_2_prob_g2 < team_1_prob_g2)
	):
	draw = True

	elif team_1_prob > team_2_prob:
	winner = team_1
	winner_proba = team_1_prob

	elif team_2_prob > team_1_prob:
	winner = team_2
	winner_proba = team_2_prob
	return draw, winner, winner_proba

	def predict_all_matches(self) -> Text:
	"""
	Predict all the matches in the tournament
	:return:
	"""
	result = ""
	data = load_pickle(os.path.join(DATA_ROOT, cfg.data.table_matches))
	table = data["table"]
	matches = data["matches"]
	advanced_group, last_group = [], ""

	for teams in matches:
	draw = False
	(
	team_1_prob_g1,
	team_1_prob_g2,
	team_1_prob,
	team_2_prob,
	team_2_prob_g1,
	team_2_prob_g2,
	) = self.__predict(teams[1], teams[2])
	winner, winner_proba = "", 0.0
	if (
	(team_1_prob_g1 > team_2_prob_g1) & (team_2_prob_g2 > team_1_prob_g2)
	) \| ((team_1_prob_g1 < team_2_prob_g1) & (team_2_prob_g2 < team_1_prob_g2)):
	draw = True
	for i in table[teams[0]]:
	if i[0] == teams[1] or i[0] == teams[2]:
	i[1] += 1

	elif team_1_prob > team_2_prob:
	winner = teams[1]
	winner_proba = team_1_prob
	for i in table[teams[0]]:
	if i[0] == teams[1]:
	i[1] += 3

	elif team_2_prob > team_1_prob:
	winner = teams[2]
	winner_proba = team_2_prob
	for i in table[teams[0]]:
	if i[0] == teams[2]:
	i[1] += 3

	for i in table[teams[0]]: # adding tiebreaker (probs per game)
	if i[0] == teams[1]:
	i[2].append(team_1_prob)
	if i[0] == teams[2]:
	i[2].append(team_2_prob)

	if last_group != teams[0]:
	if last_group != "":
	result += "\n"
	result += "Group %s advanced: \n" % last_group
	for i in table[last_group]: # adding tiebreaker
	i[2] = np.mean(i[2])

	final_points = table[last_group]
	final_table = sorted(
	final_points, key=itemgetter(1, 2), reverse=True
	)
	advanced_group.append([final_table[0][0], final_table[1][0]])
	for i in final_table:
	result += "%s -------- %d\n" % (i[0], i[1])
	result += "\n"
	result += (
	"-" * 10
	+ " Starting Analysis for Group %s " % (teams[0])
	+ "-" * 10
	+ "\n"
	)

	if draw is False:
	result += "Group %s - %s vs. %s: Winner %s with %.2f probability\n" % (
	teams[0],
	teams[1],
	teams[2],
	winner,
	winner_proba,
	)
	else:
	result += "Group %s - %s vs. %s: Draw\n" % (
	teams[0],
	teams[1],
	teams[2],
	)
	last_group = teams[0]
	result += "\n"
	result += "Group %s advanced: \n" % last_group

	for i in table[last_group]: # adding tiebreaker
	i[2] = np.mean(i[2])

	final_points = table[last_group]
	final_table = sorted(final_points, key=itemgetter(1, 2), reverse=True)
	advanced_group.append([final_table[0][0], final_table[1][0]])
	for i in final_table:
	result += "%s -------- %d\n" % (i[0], i[1])

	advanced = advanced_group
	playoffs = {
	"Round of 16": [],
	"Quarter-Final": [],
	"Semi-Final": [],
	"Final": [],
	}

	for p in playoffs.keys():
	playoffs[p] = []

	actual_round = ""
	next_rounds = []

	for p in playoffs.keys():
	if p == "Round of 16":
	control = []
	for a in range(0, len(advanced * 2), 1):
	if a < len(advanced):
	if a % 2 == 0:
	control.append((advanced * 2)[a][0])
	else:
	control.append((advanced * 2)[a][1])
	else:
	if a % 2 == 0:
	control.append((advanced * 2)[a][1])
	else:
	control.append((advanced * 2)[a][0])
	playoffs[p] = [
	[control[c], control[c + 1]]
	for c in range(0, len(control) - 1, 1)
	if c % 2 == 0
	]

	for i in range(0, len(playoffs[p]), 1):
	game = playoffs[p][i]

	home = game[0]
	away = game[1]

	(
	team_1_prob_g1,
	team_1_prob_g2,
	team_1_prob,
	team_2_prob,
	team_2_prob_g1,
	team_2_prob_g2,
	) = self.__predict(home, away)
	if actual_round != p:
	result += "-" * 10 + "\n"
	result += "Starting simulation of %s\n" % p
	result += "-" * 10 + "\n"

	if team_1_prob < team_2_prob:
	result += "%s vs. %s: %s advances with prob %.2f\n" % (
	home,
	away,
	away,
	team_2_prob,
	)
	next_rounds.append(away)
	else:
	result += "%s vs. %s: %s advances with prob %.2f\n" % (
	home,
	away,
	home,
	team_1_prob,
	)
	next_rounds.append(home)

	game.append([team_1_prob, team_2_prob])
	playoffs[p][i] = game
	actual_round = p

	else:
	playoffs[p] = [
	[next_rounds[c], next_rounds[c + 1]]
	for c in range(0, len(next_rounds) - 1, 1)
	if c % 2 == 0
	]
	next_rounds = []
	for i in range(0, len(playoffs[p])):
	game = playoffs[p][i]
	home = game[0]
	away = game[1]

	(
	team_1_prob_g1,
	team_1_prob_g2,
	team_1_prob,
	team_2_prob,
	team_2_prob_g1,
	team_2_prob_g2,
	) = self.__predict(home, away)
	if actual_round != p:
	result += "-" * 10 + "\n"
	result += "Starting simulation of %s\n" % p
	result += "-" * 10 + "\n"

	if team_1_prob < team_2_prob:
	result += "%s vs. %s: %s advances with prob %.2f \n" % (
	home,
	away,
	away,
	team_2_prob,
	)
	next_rounds.append(away)
	else:
	result += "%s vs. %s: %s advances with prob %.2f \n" % (
	home,
	away,
	home,
	team_1_prob,
	)
	next_rounds.append(home)
	game.append([team_1_prob, team_2_prob])
	playoffs[p][i] = game
	actual_round = p

	print(result)
	return result