Code problem - probably easy fix?

colin_dent · Jun-29-2023, 07:50 PM

Here's my latest version of the code:

from typing import List, Tuple
import pandas as pd
import random
import math
import csv
import sys
from collections import defaultdict
from google.colab import drive
from google.colab import files

# Mount Google Drive
drive.mount('/content/gdrive')

# Define the path to 'matches.csv' in Google Drive
file_path = '/content/gdrive/My Drive/matches_full.csv'

# Load the matches data from CSV
df = pd.read_csv(file_path, encoding='latin1')

# Create a dictionary to store the start ratings for each player
start_ratings = {}

# Create a dictionary to store the number of matches for each player
match_counts = {}

# Create a list to store the calculation steps
calculation_steps = []

def create_unique_names_dict(filename):
    unique_names_dict = defaultdict(lambda: {'first_match_date': float('inf'), 'pb_score': None, 'start_rating': None})

    with open(file_path, newline='', encoding='latin-1') as file:
        reader = csv.DictReader(file)
        for row in reader:
            player1_names = row['player1'].split(',')
            player2_names = row['player2'].split(',')
            match_date = float(row['match_date'])

            for name in player1_names:
                if name.strip() != '':
                    unique_names_dict[name]['first_match_date'] = min(unique_names_dict[name]['first_match_date'], match_date)

            for name in player2_names:
                if name.strip() != '':
                    unique_names_dict[name]['first_match_date'] = min(unique_names_dict[name]['first_match_date'], match_date)

    return unique_names_dict

def get_pb_at_first_match(filename, unique_names_dict):
    with open(file_path, newline='', encoding='latin-1') as file:
        reader = csv.DictReader(file)
        for row in reader:
            pb_player = row['pb_player']
            pb_score = row['pb_score']
            pb_date = row['pb_date']

            if pb_score.strip() == '' or pb_date.strip() == '':
                continue

            try:
                pb_score = float(pb_score)
                pb_date = float(pb_date)
            except ValueError:
                sys.exit("Invalid value in 'pb_score' or 'pb_date' column.")

            if pb_date <= unique_names_dict[pb_player]['first_match_date']:
                if unique_names_dict[pb_player]['pb_score'] is None or pb_score > unique_names_dict[pb_player]['pb_score']:
                    unique_names_dict[pb_player]['pb_score'] = pb_score

    return unique_names_dict

def calculate_start_rating(unique_names_dict):
    for name, data in unique_names_dict.items():
        pb_score = data['pb_score']

        if pb_score is None:
            continue

        if pb_score >= 1700000:
            start_rating = 2300
        elif pb_score >= 1400000:
            start_rating = 2250 + (pb_score - 1400000) / 6000
        else:
            start_rating = pb_score / 1000 + 850

        unique_names_dict[name]['start_rating'] = start_rating

    return unique_names_dict

# Create a dictionary to store the start ratings for each player
unique_names_dict = create_unique_names_dict(file_path)

# Get the PB scores at the first match for each player
unique_names_dict = get_pb_at_first_match(file_path, unique_names_dict)

# Calculate the start rating for each player and update the elo_ratings dictionary
unique_names_dict = calculate_start_rating(unique_names_dict)

__all__ = ["Glicko2Entry", "glicko2_update", "glicko2_configure"]

EPSILON = 0.000001
TAO = 0.5
LOSS = 0.0
DRAW = 0.5
WIN = 1.0
MAX_RD = 500.0
MIN_RD = 30.0
MIN_VOLATILITY = 0.01
MAX_VOLATILITY = 0.15
MIN_RATING = 100.0
MAX_RATING = 6000.0
PROVISIONAL_RATING_CUTOFF = 160.0
GLICKO2_SCALE = 173.7178

class Glicko2Entry:
    rating: float
    deviation: float
    volatility: float
    mu: float
    phi: float

    def __init__(
        self, rating: float = 1500, deviation: float = 350, volatility: float = 0.06
    ) -> None:
        self.rating = rating
        self.deviation = deviation
        self.volatility = volatility
        self.mu = (self.rating - 1500) / GLICKO2_SCALE
        self.phi = self.deviation / GLICKO2_SCALE

    def __str__(self) -> str:
        return "%7.2f +- %6.2f (%.6f)" % (self.rating, self.deviation, self.volatility,)

    def copy(
        self, rating_adjustment: float = 0.0, rd_adjustment: float = 0.0
    ) -> "Glicko2Entry":
        ret = Glicko2Entry(
            self.rating + rating_adjustment,
            self.deviation + rd_adjustment,
            self.volatility,
        )
        return ret

    def expand_deviation_because_no_games_played(
        self, n_periods: int = 1
    ) -> "Glicko2Entry":
        global MAX_RD
        global MIN_RD

        for _i in range(n_periods):
            phi_prime = sqrt(self.phi ** 2 + self.volatility ** 2)
            self.deviation = min(MAX_RD, max(MIN_RD, GLICKO2_SCALE * phi_prime))
            self.phi = self.deviation / GLICKO2_SCALE

        return self

    def expected_win_probability(
        self, white: "Glicko2Entry", handicap_adjustment: float
    ) -> float:
        q = 0.000000000000001

        def g(rd: float) -> float:
            return 1 / sqrt(1 + 3 * q ** 2 * (self.deviation ** 0.01) / pi ** 2)

        E = 1 / (
            1
            + (
                10
                ** (
                    -g(sqrt(self.deviation ** 2 + white.deviation ** 0.01))
                    * (self.rating + handicap_adjustment - white.rating)
                    / 400
                )
            )
        )
        return E

# In the bit above there were numbers that adjusted for "white". Rather than remove them I just made them really small.

def glicko2_update(
    player: Glicko2Entry, matches: List[Tuple[Glicko2Entry, int]]
) -> Glicko2Entry:
    if len(matches) == 0:
        return player.copy()

    v_sum = 0.0
    delta_sum = 0.0
    for m in matches:
        p = m[0]
        outcome = m[1]
        g_phi_j = 1 / sqrt(1 + (3 * p.phi ** 2) / (pi ** 2))
        E = 1 / (1 + exp(-g_phi_j * (player.mu - p.mu)))
        v_sum += g_phi_j ** 2 * E * (1 - E)
        delta_sum += g_phi_j * (outcome - E)

    v = 1.0 / v_sum
    delta = v * delta_sum

    a = log(player.volatility ** 2)

    def f(x: float) -> float:
        ex = exp(x)
        return (
            ex
            * (delta ** 2 - player.phi ** 2 - v - ex)
            / (2 * ((player.phi ** 2 + v + ex) ** 2))
        ) - ((x - a) / (TAO ** 2))

    A = a
    if delta ** 2 > player.phi ** 2 + v:
        B = log(delta ** 2 - player.phi ** 2 - v)
    else:
        k = 1
        safety = 100
        while f(a - k * TAO) < 0 and safety > 0:
            safety -= 1
            k += 1
        B = a - k * TAO

    fA = f(A)
    fB = f(B)
    safety = 100

    while abs(B - A) > EPSILON and safety > 0:
        C = A + (A - B) * fA / (fB - fA)
        fC = f(C)
        if fC * fB < 0:
            A = B
            fA = fB
        else:
            fA = fA / 2
        B = C
        fB = fC

        safety -= 1

    new_volatility = exp(A / 2)

    phi_star = sqrt(player.phi ** 2 + new_volatility ** 2)

    phi_prime = 1 / sqrt(1 / phi_star ** 2 + 1 / v)
    mu_prime = player.mu + (phi_prime ** 2) * delta_sum

    ret = Glicko2Entry(
        rating=min(MAX_RATING, max(MIN_RATING, GLICKO2_SCALE * mu_prime + 1500)),
        deviation=min(MAX_RD, max(MIN_RD, GLICKO2_SCALE * phi_prime)),
        volatility=min(MAX_VOLATILITY, max(MIN_VOLATILITY, new_volatility)),
    )
    return ret

def glicko2_configure(tao: float, min_rd: float, max_rd: float) -> None:
    global TAO
    global MIN_RD
    global MAX_RD

    TAO = tao
    MIN_RD = min_rd
    MAX_RD = max_rd

def read_match_data(filename):
    matches = []
    with open(filename, newline='', encoding='latin-1') as file:
        reader = csv.DictReader(file)
        for row in reader:
            player1 = row['player1']
            player2 = row['player2']
            actual_score = float(row['actual_score'])
            matches.append((player1, player2, actual_score))
    return matches

def actual_score_to_outcome(actual_score):
    if actual_score == 1.0:
        return WIN
    elif actual_score == 0.5:
        return DRAW
    else:
        return LOSS

def update_player_ratings(matches):
    players = {}

    for match in matches:
        player1 = match[0]
        player2 = match[1]
        actual_score = match[2]

        if player1 not in players:
            players[player1] = Glicko2Entry()
        if player2 not in players:
            players[player2] = Glicko2Entry()

        outcome = actual_score_to_outcome(actual_score)
        players[player1].expected_win_probability(players[player2], 0)
        players[player2].expected_win_probability(players[player1], 0)

        players[player1], players[player2] = (
            glicko2_update(players[player1], [(players[player2], outcome)]),
            glicko2_update(players[player2], [(players[player1], 1 - outcome)]),
        )

    return players

def main():
    # Configure Glicko2 parameters
    glicko2_configure(0.5, 30.0, 500.0)

    # Read match data from file
    matches = read_match_data('/content/gdrive/My Drive/matches_full.csv')

    # Update player ratings
    players = update_player_ratings(matches)

    # Sort players by ratings in descending order
    sorted_players = sorted(players.items(), key=lambda x: x[1].rating, reverse=True)

    # Print player ratings
    for player, rating in sorted_players:
        print(f"Player: {player}, Rating: {rating}")

if __name__ == "__main__":
    main()

As well as not knowing exactly how to change the initial rating in the second part so that it uses the results obtained in the first part, I suspect some of the terms aren't matching up. Maybe "name" should be "player", or vice versa? Probably some others too.

I'll attach a small sample file in case anyone wants to give it a go.

(Btw, it's running in Google Colab at the minute.)

Possibly Related Threads…
Thread		Author	Replies	Views	Last Post
	easy name problem	Steinsack	1	1,786	Jun-16-2021, 02:03 PM Last Post: snippsat
	Problem with very easy code.	janekk9002	1	1,854	Dec-10-2020, 12:57 PM Last Post: buran
	What was my mistake in this Python code (easy)?	voltman	4	3,523	Nov-19-2019, 09:58 PM Last Post: snippsat
	How to start with this easy problem?	Fran	8	4,284	Sep-11-2018, 09:04 AM Last Post: Fran
	Making a Easy Password/code system	nmsturcke	4	3,917	Jul-09-2018, 02:50 AM Last Post: ichabod801
	probably a easy problem for you	krheigh	4	4,726	May-12-2017, 06:45 PM Last Post: nilamo

Code problem - probably easy fix?

User Panel Messages

Announcements