Python Forum

Full Version: tic tac toe python minimax alpha beta
You're currently viewing a stripped down version of our content. View the full version with proper formatting.
Hi everyone I have this python code which works but is not optimal . Its a tic tac toe using minimax and alpha beta pruning .the computer is not selecting the third x and winning if it has the chance to but is placing it another spot which makes no sense . Can someone help me find out where my error is . Should i check winner before calling max node?
here is my code https://pastebin.com/z8mqnCeW

import turtle
import copy
import random
 
screen = turtle.Screen()
screen.setup(800,800)
screen.title("Tic Tac Toe ")
screen.setworldcoordinates(-5,-5,5,5)
screen.bgcolor('light gray')
screen.tracer(0,0)
turtle.hideturtle()
 
def draw_board():
    turtle.pencolor('green')
    turtle.pensize(10)
    turtle.up()
    turtle.goto(-3,-1)
    turtle.seth(0)
    turtle.down()
    turtle.fd(6)
    turtle.up()
    turtle.goto(-3,1)
    turtle.seth(0)
    turtle.down()
    turtle.fd(6)
    turtle.up()
    turtle.goto(-1,-3)
    turtle.seth(90)
    turtle.down()
    turtle.fd(6)
    turtle.up()
    turtle.goto(1,-3)
    turtle.seth(90)
    turtle.down()
    turtle.fd(6)
 
def draw_circle(x,y):
    turtle.up()
    turtle.goto(x,y-0.75)
    turtle.seth(0)
    turtle.color('red')
    turtle.down()
    turtle.circle(0.75, steps=100)
 
def draw_x(x,y):
    turtle.color('blue')
    turtle.up()
    turtle.goto(x-0.75,y-0.75)
    turtle.down()
    turtle.goto(x+0.75,y+0.75)
    turtle.up()
    turtle.goto(x-0.75,y+0.75)
    turtle.down()
    turtle.goto(x+0.75,y-0.75)
 
def draw_piece(i,j,p):
    if p==0: return
    x,y = 2*(j-1), -2*(i-1)
    if p==1:
        draw_x(x,y)
    else:
        draw_circle(x,y)
 
def draw(b):
    draw_board()
    for i in range(3):
        for j in range(3):
            draw_piece(i,j,b[i][j])
    screen.update()
 
# return 1 if player 1 wins, 2 if player 2 wins, 3 if tie, 0 if game is not over
def gameover(b):
    if b[0][0]>0 and b[0][0] == b[0][1] and b[0][1] == b[0][2]: return b[0][0]
    if b[1][0]>0 and b[1][0] == b[1][1] and b[1][1] == b[1][2]: return b[1][0]
    if b[2][0]>0 and b[2][0] == b[2][1] and b[2][1] == b[2][2]: return b[2][0]
    if b[0][0]>0 and b[0][0] == b[1][0] and b[1][0] == b[2][0]: return b[0][0]
    if b[0][1]>0 and b[0][1] == b[1][1] and b[1][1] == b[2][1]: return b[0][1]
    if b[0][2]>0 and b[0][2] == b[1][2] and b[1][2] == b[2][2]: return b[0][2]
    if b[0][0]>0 and b[0][0] == b[1][1] and b[1][1] == b[2][2]: return b[0][0]
    if b[2][0]>0 and b[2][0] == b[1][1] and b[1][1] == b[0][2]: return b[2][0]
    p = 0
    for i in range(3):
        for j in range(3):
            p += (1 if b[i][j] > 0 else 0)
    if p==9: return 3
    else: return 0
 
def play(x,y):
    global turn
    if turn=='x': return
 
    i = 3-int(y+5)//2
    j = int(x+5)//2 - 1
    if i>2 or j>2 or i<0 or j<0 or b[i][j]!=0: return
    if turn == 'x': b[i][j], turn = 1, 'o'
    else: b[i][j], turn = 2, 'x'
    draw(b)
    r = gameover(b)
    if r==1:
        screen.textinput("Game over!","X won!")
    elif r==2:
        screen.textinput("Game over!","O won!")
    elif r==3:
        screen.textinput("Game over!", "Tie!")
    if r>0: turtle.bye()
    _,move = max_node(b,-2,2)
    b[move[0]][move[1]] = 1
    draw(b)
    r = gameover(b)
    if r==1:
        screen.textinput("Game over!","X won!")
    elif r==2:
        screen.textinput("Game over!","O won!")
    elif r==3:
        screen.textinput("Game over!", "Tie!")
    if r>0: turtle.bye()
    turn = 'o'
 
b = [ [ 0,0,0 ], [ 0,0,0 ], [ 0,0,0 ] ]    
draw(b)
turn = 'x'
screen.onclick(play)
#turtle.mainloop()
 
def max_node(b,alpha,beta):
    r = gameover(b)
    if r==1: return 1,None
    elif r==2: return -1,None
    elif r==3: return 0,None
 
    score = -2
    # find all possible next moves
    pm = list()
    for i in range(3):
        for j in range(3):
            if b[i][j] == 0: pm.append((i,j))
    random.shuffle(pm)
    for (i,j) in pm:
        if b[i][j] == 0:
            nb = copy.deepcopy(b)
            nb[i][j] = 1
            cs,_ = min_node(nb,alpha,beta)
            if score<cs:
                score=cs
                move = (i,j)
            alpha = max(alpha,cs)
            if alpha>=beta: return score,move
    return score,move
 
def min_node(b,alpha,beta):
    r = gameover(b)
    if r==1: return 1,None
    elif r==2: return -1,None
    elif r==3: return 0,None
 
    score = 2
    # find all possible next moves
    pm = list()
    random.shuffle(pm)
    for i in range(3):
        for j in range(3):
            if b[i][j] == 0: pm.append((i,j))
    for (i,j) in pm:
        if b[i][j] == 0:
            nb = copy.deepcopy(b)
            nb[i][j] = 2
            cs,_ = max_node(nb,alpha,beta)
            if score>cs:
                score=cs
                move = (i,j)
            beta = min(beta,cs)
            if alpha>=beta: return score,move
    return score,move
 
_,move = max_node(b,-2,2)
b[move[0]][move[1]] = 1
draw(b)
turn = 1
screen.mainloop()    
my code is more than 25 lines so i posted from pastebin
https://pastebin.com/z8mqnCeW
sorry i cant get the code in proper format so i am attaching it[attachment=2660]
There is no limit to the length of code that can be placed within bbcode tags.
Many, including myself will not open attachments or links.
The easiest way is to highlight your code, then click the python icon above.
import turtle
import copy
import random
  
screen = turtle.Screen()
screen.setup(800,800)
screen.title("Tic Tac Toe ")
screen.setworldcoordinates(-5,-5,5,5)
screen.bgcolor('light gray')
screen.tracer(0,0)
turtle.hideturtle()
  
def draw_board():
    turtle.pencolor('green')
    turtle.pensize(10)
    turtle.up()
    turtle.goto(-3,-1)
    turtle.seth(0)
    turtle.down()
    turtle.fd(6)
    turtle.up()
    turtle.goto(-3,1)
    turtle.seth(0)
    turtle.down()
    turtle.fd(6)
    turtle.up()
    turtle.goto(-1,-3)
    turtle.seth(90)
    turtle.down()
    turtle.fd(6)
    turtle.up()
    turtle.goto(1,-3)
    turtle.seth(90)
    turtle.down()
    turtle.fd(6)
  
def draw_circle(x,y):
    turtle.up()
    turtle.goto(x,y-0.75)
    turtle.seth(0)
    turtle.color('red')
    turtle.down()
    turtle.circle(0.75, steps=100)
  
def draw_x(x,y):
    turtle.color('blue')
    turtle.up()
    turtle.goto(x-0.75,y-0.75)
    turtle.down()
    turtle.goto(x+0.75,y+0.75)
    turtle.up()
    turtle.goto(x-0.75,y+0.75)
    turtle.down()
    turtle.goto(x+0.75,y-0.75)
  
def draw_piece(i,j,p):
    if p==0: return
    x,y = 2*(j-1), -2*(i-1)
    if p==1:
        draw_x(x,y)
    else:
        draw_circle(x,y)
  
def draw(b):
    draw_board()
    for i in range(3):
        for j in range(3):
            draw_piece(i,j,b[i][j])
    screen.update()
  
# return 1 if player 1 wins, 2 if player 2 wins, 3 if tie, 0 if game is not over
def gameover(b):
    if b[0][0]>0 and b[0][0] == b[0][1] and b[0][1] == b[0][2]: return b[0][0]
    if b[1][0]>0 and b[1][0] == b[1][1] and b[1][1] == b[1][2]: return b[1][0]
    if b[2][0]>0 and b[2][0] == b[2][1] and b[2][1] == b[2][2]: return b[2][0]
    if b[0][0]>0 and b[0][0] == b[1][0] and b[1][0] == b[2][0]: return b[0][0]
    if b[0][1]>0 and b[0][1] == b[1][1] and b[1][1] == b[2][1]: return b[0][1]
    if b[0][2]>0 and b[0][2] == b[1][2] and b[1][2] == b[2][2]: return b[0][2]
    if b[0][0]>0 and b[0][0] == b[1][1] and b[1][1] == b[2][2]: return b[0][0]
    if b[2][0]>0 and b[2][0] == b[1][1] and b[1][1] == b[0][2]: return b[2][0]
    p = 0
    for i in range(3):
        for j in range(3):
            p += (1 if b[i][j] > 0 else 0)
    if p==9: return 3
    else: return 0
  
def play(x,y):
    global turn
    if turn=='x': return
  
    i = 3-int(y+5)//2
    j = int(x+5)//2 - 1
    if i>2 or j>2 or i<0 or j<0 or b[i][j]!=0: return
    if turn == 'x': b[i][j], turn = 1, 'o'
    else: b[i][j], turn = 2, 'x'
    draw(b)
    r = gameover(b)
    if r==1:
        screen.textinput("Game over!","X won!")
    elif r==2:
        screen.textinput("Game over!","O won!")
    elif r==3:
        screen.textinput("Game over!", "Tie!")
    if r>0: turtle.bye()
    _,move = max_node(b,-2,2)
    b[move[0]][move[1]] = 1
    draw(b)
    r = gameover(b)
    if r==1:
        screen.textinput("Game over!","X won!")
    elif r==2:
        screen.textinput("Game over!","O won!")
    elif r==3:
        screen.textinput("Game over!", "Tie!")
    if r>0: turtle.bye()
    turn = 'o'
  
b = [ [ 0,0,0 ], [ 0,0,0 ], [ 0,0,0 ] ]    
draw(b)
turn = 'x'
screen.onclick(play)
#turtle.mainloop()
  
def max_node(b,alpha,beta):
    r = gameover(b)
    if r==1: return 1,None
    elif r==2: return -1,None
    elif r==3: return 0,None
  
    score = -2
    # find all possible next moves
    pm = list()
    for i in range(3):
        for j in range(3):
            if b[i][j] == 0: pm.append((i,j))
    random.shuffle(pm)
    for (i,j) in pm:
        if b[i][j] == 0:
            nb = copy.deepcopy(b)
            nb[i][j] = 1
            cs,_ = min_node(nb,alpha,beta)
            if score<cs:
                score=cs
                move = (i,j)
            alpha = max(alpha,cs)
            if alpha>=beta: return score,move
    return score,move
  
def min_node(b,alpha,beta):
    r = gameover(b)
    if r==1: return 1,None
    elif r==2: return -1,None
    elif r==3: return 0,None
  
    score = 2
    # find all possible next moves
    pm = list()
    random.shuffle(pm)
    for i in range(3):
        for j in range(3):
            if b[i][j] == 0: pm.append((i,j))
    for (i,j) in pm:
        if b[i][j] == 0:
            nb = copy.deepcopy(b)
            nb[i][j] = 2
            cs,_ = max_node(nb,alpha,beta)
            if score>cs:
                score=cs
                move = (i,j)
            beta = min(beta,cs)
            if alpha>=beta: return score,move
    return score,move
  
_,move = max_node(b,-2,2)
b[move[0]][move[1]] = 1
draw(b)
turn = 1
screen.mainloop()    
hope this works
Here's an example of how you can modify your max_node function:
def max_node(board):
    if gameover(board) == 1:  # Check if the current state is a winning state for the computer
        return 1  # Assign a high score for a winning move
    elif gameover(board) == 2:  # Check if the current state is a winning state for the opponent
        return -1  # Assign a low score for a losing move
    elif gameover(board) == 3:  # Check if the current state is a tie
        return 0  # Assign a neutral score for a tie
    else:
        best_score = float('-inf')
        for move in get_possible_moves(board):
            new_board = copy.deepcopy(board)
            make_move(new_board, move, 1)
            score = min_node(new_board)
            best_score = max(best_score, score)
        return best_score
By adding this check before returning the score, the max_node function will prioritize winning moves and select them when available. This should resolve the issue you mentioned.
I played some games and the code appears to work just fine. One thing you might want to consider is returning a higher score for wins that occur in fewer moves. Your minimax code scores every win as 1 regardless of how many moves it takes to win. While I was playing the minmax algorithm occasionally passed on moves that would win immediately, picking other moves that were still guaranteed wins, but for the next move.

Below is a minimax algorithm that gives higher scores for wins that occur sooner. In addition to giving more weight to moves that win faster, I changed the board from a 2D list to a flat list. Just because a tic-tac-toe board has rows and columns is no reason why the software has to save the board in a 2D container. Iterating and indexing are easier with a flat list. Making a new board is easier with a flat list. Checking for wins is even easier with a flat list. The only time the software has to know about rows and columns is when it draws X's and O's on the board, and when the user clicks on the board to place their mark. In both cases additional transformations are required to get to/from turtle coordinates.
import turtle
import random

wins = ((0, 1, 2), (3, 4, 5), (6, 7, 8), (0, 3, 6), (1, 4, 7), (2, 5, 8), (0, 4, 8), (2, 4, 6))
board = [''] * 9
ROBOT = 'X'
HUMAN = 'O'
DRAW = 2


def line(start, end):
    """Draw a line from start to end."""
    turtle.up()
    turtle.goto(*start)
    turtle.down()
    turtle.goto(*end)


def draw_marker(square, marker):
    """Draw an x or o."""
    x = square % 3 + 0.5
    y = square // 3 + 0.5
    if marker == ROBOT:
        turtle.color('blue')
        line((x-0.4, y-0.4), (x+0.4, y+0.4))
        line((x-0.4, y+0.4), (x+0.4, y-0.4))
    elif marker == HUMAN:
        turtle.color('red')
        turtle.up()
        turtle.goto(x, y-0.4)
        turtle.seth(0)
        turtle.down()
        turtle.circle(0.4, steps=100)


def draw():
    """Draw the board."""
    turtle.pencolor('green')
    turtle.pensize(10)
    line((0, 1), (3, 1))
    line((0, 2), (3, 2))
    line((1, 0), (1, 3))
    line((2, 0), (2, 3))
    for square, marker in enumerate(board):
        draw_marker(square, marker)
    screen.update()


def game_over():
    """Return marker if marker won, 2 if draw, else 0."""
    for win in wins:
        a, b, c = [board[square] for square in win]
        if a and a == b == c:
            return a
    if '' not in board:
        return DRAW
    return 0


def open_squares():
    """Return list of squares that are open."""
    squares = [index for index, player in enumerate(board) if not player]
    random.shuffle(squares)  # Shuffle to make robot player less predictable.
    return squares


def minmax(marker=ROBOT, alpha=-11, beta=11, weight=10):
    """Use min-max algorithm to pick best move for robot player."""
    if (winner := game_over()) == ROBOT:
        return weight, None
    elif winner == HUMAN:
        return -weight, None
    elif winner == DRAW:
        return 0, None
    
    if marker == ROBOT:
        best = -weight, None
        other = HUMAN
    else:
        best = weight, None
        other = ROBOT

    for square in open_squares():
        board[square] = marker
        score, _ = minmax(other, alpha, beta, weight-1)
        board[square] = ""

        if score > best[0] and marker == ROBOT:
            best = score, square
            alpha = max(alpha, score)
        elif score < best[0] and marker == HUMAN:
            best = score, square
            beta = min(beta, score)
        if alpha >= beta:
                break
    return best


def play(square, marker):
    """Place marker in square.  Return game_over status."""
    board[square] = marker
    draw()
    return game_over()


def click(x, y):
    """Respond to mouse click.

    If player clicked on open square, place o marker and have robot play x.
    Display message if game over.
    """
    square = None
    if 0 <= x < 3 and 0 <= y < 3:
        square = int(x) + int(y) * 3
    if square is None or board[square]:
        return

    winner = play(square, HUMAN)
    if not winner:
        winner = play(minmax()[1], ROBOT)

    if winner:
        """Display results if game over."""
        if winner == DRAW:
            screen.textinput("Game over!", "Tie!")
        else:
            screen.textinput("Game over!", f"{winner} won!")
        turtle.bye()


screen = turtle.Screen()
screen.title("Tic Tac Toe ")
screen.setup(800, 800)
screen.setworldcoordinates(-0.2, 3.2, 3.2, -0.2)
screen.tracer(0, 0)
turtle.hideturtle()
screen.onclick(click)

# Robot makes first move.
play(minmax()[1], ROBOT)
screen.mainloop()