Jun-29-2023, 07:50 PM
Here's my latest version of the code:
I'll attach a small sample file in case anyone wants to give it a go.
(Btw, it's running in Google Colab at the minute.)
from typing import List, Tuple import pandas as pd import random import math import csv import sys from collections import defaultdict from google.colab import drive from google.colab import files # Mount Google Drive drive.mount('/content/gdrive') # Define the path to 'matches.csv' in Google Drive file_path = '/content/gdrive/My Drive/matches_full.csv' # Load the matches data from CSV df = pd.read_csv(file_path, encoding='latin1') # Create a dictionary to store the start ratings for each player start_ratings = {} # Create a dictionary to store the number of matches for each player match_counts = {} # Create a list to store the calculation steps calculation_steps = [] def create_unique_names_dict(filename): unique_names_dict = defaultdict(lambda: {'first_match_date': float('inf'), 'pb_score': None, 'start_rating': None}) with open(file_path, newline='', encoding='latin-1') as file: reader = csv.DictReader(file) for row in reader: player1_names = row['player1'].split(',') player2_names = row['player2'].split(',') match_date = float(row['match_date']) for name in player1_names: if name.strip() != '': unique_names_dict[name]['first_match_date'] = min(unique_names_dict[name]['first_match_date'], match_date) for name in player2_names: if name.strip() != '': unique_names_dict[name]['first_match_date'] = min(unique_names_dict[name]['first_match_date'], match_date) return unique_names_dict def get_pb_at_first_match(filename, unique_names_dict): with open(file_path, newline='', encoding='latin-1') as file: reader = csv.DictReader(file) for row in reader: pb_player = row['pb_player'] pb_score = row['pb_score'] pb_date = row['pb_date'] if pb_score.strip() == '' or pb_date.strip() == '': continue try: pb_score = float(pb_score) pb_date = float(pb_date) except ValueError: sys.exit("Invalid value in 'pb_score' or 'pb_date' column.") if pb_date <= unique_names_dict[pb_player]['first_match_date']: if unique_names_dict[pb_player]['pb_score'] is None or pb_score > unique_names_dict[pb_player]['pb_score']: unique_names_dict[pb_player]['pb_score'] = pb_score return unique_names_dict def calculate_start_rating(unique_names_dict): for name, data in unique_names_dict.items(): pb_score = data['pb_score'] if pb_score is None: continue if pb_score >= 1700000: start_rating = 2300 elif pb_score >= 1400000: start_rating = 2250 + (pb_score - 1400000) / 6000 else: start_rating = pb_score / 1000 + 850 unique_names_dict[name]['start_rating'] = start_rating return unique_names_dict # Create a dictionary to store the start ratings for each player unique_names_dict = create_unique_names_dict(file_path) # Get the PB scores at the first match for each player unique_names_dict = get_pb_at_first_match(file_path, unique_names_dict) # Calculate the start rating for each player and update the elo_ratings dictionary unique_names_dict = calculate_start_rating(unique_names_dict) __all__ = ["Glicko2Entry", "glicko2_update", "glicko2_configure"] EPSILON = 0.000001 TAO = 0.5 LOSS = 0.0 DRAW = 0.5 WIN = 1.0 MAX_RD = 500.0 MIN_RD = 30.0 MIN_VOLATILITY = 0.01 MAX_VOLATILITY = 0.15 MIN_RATING = 100.0 MAX_RATING = 6000.0 PROVISIONAL_RATING_CUTOFF = 160.0 GLICKO2_SCALE = 173.7178 class Glicko2Entry: rating: float deviation: float volatility: float mu: float phi: float def __init__( self, rating: float = 1500, deviation: float = 350, volatility: float = 0.06 ) -> None: self.rating = rating self.deviation = deviation self.volatility = volatility self.mu = (self.rating - 1500) / GLICKO2_SCALE self.phi = self.deviation / GLICKO2_SCALE def __str__(self) -> str: return "%7.2f +- %6.2f (%.6f)" % (self.rating, self.deviation, self.volatility,) def copy( self, rating_adjustment: float = 0.0, rd_adjustment: float = 0.0 ) -> "Glicko2Entry": ret = Glicko2Entry( self.rating + rating_adjustment, self.deviation + rd_adjustment, self.volatility, ) return ret def expand_deviation_because_no_games_played( self, n_periods: int = 1 ) -> "Glicko2Entry": global MAX_RD global MIN_RD for _i in range(n_periods): phi_prime = sqrt(self.phi ** 2 + self.volatility ** 2) self.deviation = min(MAX_RD, max(MIN_RD, GLICKO2_SCALE * phi_prime)) self.phi = self.deviation / GLICKO2_SCALE return self def expected_win_probability( self, white: "Glicko2Entry", handicap_adjustment: float ) -> float: q = 0.000000000000001 def g(rd: float) -> float: return 1 / sqrt(1 + 3 * q ** 2 * (self.deviation ** 0.01) / pi ** 2) E = 1 / ( 1 + ( 10 ** ( -g(sqrt(self.deviation ** 2 + white.deviation ** 0.01)) * (self.rating + handicap_adjustment - white.rating) / 400 ) ) ) return E # In the bit above there were numbers that adjusted for "white". Rather than remove them I just made them really small. def glicko2_update( player: Glicko2Entry, matches: List[Tuple[Glicko2Entry, int]] ) -> Glicko2Entry: if len(matches) == 0: return player.copy() v_sum = 0.0 delta_sum = 0.0 for m in matches: p = m[0] outcome = m[1] g_phi_j = 1 / sqrt(1 + (3 * p.phi ** 2) / (pi ** 2)) E = 1 / (1 + exp(-g_phi_j * (player.mu - p.mu))) v_sum += g_phi_j ** 2 * E * (1 - E) delta_sum += g_phi_j * (outcome - E) v = 1.0 / v_sum delta = v * delta_sum a = log(player.volatility ** 2) def f(x: float) -> float: ex = exp(x) return ( ex * (delta ** 2 - player.phi ** 2 - v - ex) / (2 * ((player.phi ** 2 + v + ex) ** 2)) ) - ((x - a) / (TAO ** 2)) A = a if delta ** 2 > player.phi ** 2 + v: B = log(delta ** 2 - player.phi ** 2 - v) else: k = 1 safety = 100 while f(a - k * TAO) < 0 and safety > 0: safety -= 1 k += 1 B = a - k * TAO fA = f(A) fB = f(B) safety = 100 while abs(B - A) > EPSILON and safety > 0: C = A + (A - B) * fA / (fB - fA) fC = f(C) if fC * fB < 0: A = B fA = fB else: fA = fA / 2 B = C fB = fC safety -= 1 new_volatility = exp(A / 2) phi_star = sqrt(player.phi ** 2 + new_volatility ** 2) phi_prime = 1 / sqrt(1 / phi_star ** 2 + 1 / v) mu_prime = player.mu + (phi_prime ** 2) * delta_sum ret = Glicko2Entry( rating=min(MAX_RATING, max(MIN_RATING, GLICKO2_SCALE * mu_prime + 1500)), deviation=min(MAX_RD, max(MIN_RD, GLICKO2_SCALE * phi_prime)), volatility=min(MAX_VOLATILITY, max(MIN_VOLATILITY, new_volatility)), ) return ret def glicko2_configure(tao: float, min_rd: float, max_rd: float) -> None: global TAO global MIN_RD global MAX_RD TAO = tao MIN_RD = min_rd MAX_RD = max_rd def read_match_data(filename): matches = [] with open(filename, newline='', encoding='latin-1') as file: reader = csv.DictReader(file) for row in reader: player1 = row['player1'] player2 = row['player2'] actual_score = float(row['actual_score']) matches.append((player1, player2, actual_score)) return matches def actual_score_to_outcome(actual_score): if actual_score == 1.0: return WIN elif actual_score == 0.5: return DRAW else: return LOSS def update_player_ratings(matches): players = {} for match in matches: player1 = match[0] player2 = match[1] actual_score = match[2] if player1 not in players: players[player1] = Glicko2Entry() if player2 not in players: players[player2] = Glicko2Entry() outcome = actual_score_to_outcome(actual_score) players[player1].expected_win_probability(players[player2], 0) players[player2].expected_win_probability(players[player1], 0) players[player1], players[player2] = ( glicko2_update(players[player1], [(players[player2], outcome)]), glicko2_update(players[player2], [(players[player1], 1 - outcome)]), ) return players def main(): # Configure Glicko2 parameters glicko2_configure(0.5, 30.0, 500.0) # Read match data from file matches = read_match_data('/content/gdrive/My Drive/matches_full.csv') # Update player ratings players = update_player_ratings(matches) # Sort players by ratings in descending order sorted_players = sorted(players.items(), key=lambda x: x[1].rating, reverse=True) # Print player ratings for player, rating in sorted_players: print(f"Player: {player}, Rating: {rating}") if __name__ == "__main__": main()As well as not knowing exactly how to change the initial rating in the second part so that it uses the results obtained in the first part, I suspect some of the terms aren't matching up. Maybe "name" should be "player", or vice versa? Probably some others too.
I'll attach a small sample file in case anyone wants to give it a go.
(Btw, it's running in Google Colab at the minute.)
Attached Files
matches_full.csv (Size: 8.16 KB / Downloads: 30)