Artificial Intelligence: 4x4 TicTacToe with Monte Carlo Tree Search (MCTS)

Click this button to return to main.

This work is a snippet reconstructed from an academic coursework 95-891 Introduction to Artificial Intelligence at Carnegie Mellon University.

<aside> ⚠️

This work has NOT been built from the ground up. The partially prepared ‘skeleton code’ was provided by the Teaching Team of 95-891.

</aside>

import numpy as np
import random
import matplotlib.pyplot as plt
%matplotlib inline
class TicTacToe4x4:
    def __init__(self):
        self.board = np.zeros((4, 4), dtype=int)
        
    def reset(self):
        self.board = np.zeros((4, 4), dtype=int)
        
    def is_draw(self):
        return not np.any(self.board == 0)  # No zeros left on the board
    
    def is_winner(self, player):
        # Check rows, columns, and diagonals for a win
        for i in range(4):
            if np.all(self.board[i, :] == player) or np.all(self.board[:, i] == player):
                return True
                
        if np.all(np.diag(self.board) == player) or np.all(np.diag(np.fliplr(self.board)) == player):
            return True
        
        return False
    
    def available_moves(self):
        return np.argwhere(self.board == 0)
    
    def make_move(self, move, player):
        if self.board[move[0], move[1]] == 0:
            self.board[move[0], move[1]] = player
            return True
        return False

    def display(self):
        print(self.board)

def random_player(board, player):
    moves = board.available_moves()
    return random.choice(moves)

def play_game():
    board = TicTacToe4x4()
    current_player = 1
    while True:
        move = random_player(board, current_player)
        board.make_move(move, current_player)
        if board.is_winner(current_player):
            return current_player
        if board.is_draw():
            return 0  # Draw
        current_player = 3 - current_player  # Switch between 1 and 2
        
# Play the game 100 times and keep track of the results
results = []
for _ in range(100):
    results.append(play_game())

# Compute the average win rate for the first player over the last fifty games
average_win_rates = []
for i in range(len(results)):
    last_fifty = results[max(0, i-49):i+1]
    average_win_rates.append(last_fifty.count(1) / len(last_fifty))

# Plotting the results
plt.figure(figsize=(12, 6))
plt.plot(average_win_rates, marker='o', linestyle='-')
plt.xlabel('Game Number')
plt.ylabel('Average Win Rate (Last Fifty Games)')
plt.title('Average Win Rate of the First Player Over the Last 50 Games')
plt.grid(True)
plt.show()

The win rate of 1p stabilized around 25%.

# MCTS Node Class
class MCTSNode:
    def __init__(self, game_state, parent=None, move=None):
        self.game_state = game_state
        self.parent = parent
        self.move = move
        self.children = []
        self.visits = 0
        self.value = 0.0

    # Check if a node is fully explanded
    def is_fully_expanded(self):
        return len(self.children) == len(self.game_state.available_moves())

    # Select the best child based on the UCBT formula. The 1e-7 terms avoid dividing by 0
    def best_child(self, exploration_weight=1.0):
        scores = [
            (child.value / (child.visits + 1e-7) +
             exploration_weight * np.sqrt(np.log(self.visits) / (child.visits + 1e-7)))
            for child in self.children
        ]
        return self.children[np.argmax(scores)]

    # Simulate a playthrough to the end of the game with both players making random moves
    def rollout(self):
        current_board = self.game_state
        current_player = 1 if current_board.board[self.move[0], self.move[1]] == 2 else 2
        while True:
            if current_board.is_winner(current_player):
                return 1 if current_player == 1 else 0
            if current_board.is_draw():
                return 0.5
            move = random_player(current_board, current_player)
            current_board.make_move(move, current_player)
            current_player = 3 - current_player

    # backpropagate the results of a simulated playthrough through the ancestors of a node
    def backpropagate(self, value):
        self.visits += 1
        self.value += value
        if self.parent:
            self.parent.backpropagate(value)
            
#Here is a stub that you will need to fill out to implement the MCTS code, Code you need to write is marked by ***
# MCTS Algorithm Class
class MCTS:
    # initialize the node's exploration_weight
    def __init__(self, exploration_weight=1.0):
        self.exploration_weight = exploration_weight

    # search for the best move over 1000 simulations
    def search(self, initial_state, n_simulations=1000):
        #initialize the root to the initial state
        root = MCTSNode(initial_state)
        # for each simulation (up to n_simulations)
        for _ in range(n_simulations):
            # call select on the root node
            node = self._select(root)
            #if the node's game state is a draw or a win for either player, this simulation is done
            if node.game_state.is_draw() or (node.move and node.game_state.is_winner(3 - node.game_state.board[node.move[0], node.move[1]])):
                continue
            # otherwise expand the node to find its children
            else:
                node = self._expand(node)
            #set the reward to the results of playing out the game from the child node
                node.value = node.rollout()
            # backpropagate the reward from the child node
                node.backpropagate(node.value)
        return root.best_child().move

        # While the node is not yet fully expanded, if the node is not a leaf (has children), then find the best child
    def _select(self, node):
        while node.is_fully_expanded():
            if node.children:
                #update the node to its best child, given the exploration weight
                node = node.best_child()
            else:
                return node
        return node
    #choose a node to expand randomly
    def _expand(self, node):
        untried_moves = set(tuple(move) for move in node.game_state.available_moves()) - set(child.move for child in node.children)
        move = random.choice(list(untried_moves))
        child_game_state = TicTacToe4x4()
        child_game_state.board = node.game_state.board.copy()
        child_game_state.make_move(move, 3 - node.game_state.board[node.move[0], node.move[1]] if node.move else 1)
        child_node = MCTSNode(child_game_state, parent=node, move=move)
        node.children.append(child_node)
        return child_node

# Player Functions
def random_player(board, player):
    moves = board.available_moves()
    return random.choice(moves)

def mcts_player(board, player):
    mcts = MCTS()
    return mcts.search(board)
    
# Game Playing Loop with MCTS for Player X
def play_game_mcts():
    board = TicTacToe4x4()
    current_player = 1
    while True:
        if current_player == 1:
            # Choose the move using the mcts_player function
            move = mcts_player(board, current_player)
        else:
            move = random_player(board, current_player)
        board.make_move(move, current_player)
        if board.is_winner(current_player):
            return current_player
        if board.is_draw():
            return 0
        current_player = 3 - current_player
        
# Running the Experiment
results_mcts = []
for _ in range(100):
    results_mcts.append(play_game_mcts())

average_win_rates_mcts = []
for i in range(len(results_mcts)):
    last_fifty = results_mcts[max(0, i-49):i+1]
    # Append the average win rates over the last fifty games assigning 1 for a win and .5 for a draw
    average_win_rates_mcts.append((last_fifty.count(1) / len(last_fifty)) + (last_fifty.count(0)*0.5 / len(last_fifty)))

# Plotting the Results
plt.figure(figsize=(12, 6))
plt.plot(average_win_rates_mcts, marker='o', linestyle='-', color='blue', label='Player X using MCTS')
plt.xlabel('Game Number')
plt.ylabel('Average Win Rate (Last 50 Games)')
plt.title('Average Win Rate of Player X (using MCTS) Over the Last 50 Games')
plt.grid(True)
plt.legend()
plt.show()

The MCTS player superbly outperforms the random player with more than double win rate: 55~65%.

# Perform some experiments to find the optimal value of the exploration_weight in best_child
# Exploration for Optimal Exploration Weight
exploration_weights = np.linspace(0.1, 2, 20)  # Test values from 0.1 to 2, 20 points in total
win_rates_for_weights = []

# For each weight in exploration_weights
#   initialize results_for_weight to be empty
#   For each of 100 games
#      call MCTS with the current exploration weight
#      append the results of play_game_mcts to the current results_for_weight
#   calculate the win rate actoss all results_for_weight, assigning 1 for a win and .5 for a draw
#   Append the win rate for the current weight to win_rate_for_weights
for weight in exploration_weights:
    results_for_weight = []
    for _ in range(100):
        MCTS.exploration_weight = weight
        print(MCTS.exploration_weight) # printing for confirmation that weight changes
        results_for_weight.append(play_game_mcts())
    win_rate_for_weight = (results_for_weight.count(1) + (results_for_weight.count(0) * 0.5)) / len(results_for_weight)
    win_rates_for_weights.append(win_rate_for_weight)   

# Plotting the results
plt.figure(figsize=(12, 6))
plt.plot(exploration_weights, win_rates_for_weights, marker='o', linestyle='-')
plt.xlabel('Exploration Weight')
plt.ylabel('Win Rate for Player X (MCTS)')
plt.title('Win Rate of Player X using MCTS for Various Exploration Weights')
plt.grid(True)
plt.show()

optimal_weight = exploration_weights[np.argmax(win_rates_for_weights)]
print(optimal_weight)

...
...
...
0.1

Experiments with varying c value resulted in c=0.1 returning the highest win rate for p1.

This result was followed by c=1.1, which theoretically makes more sense, considering UCT/UCB.

More detailed (up to 2 decimal places) weight simulation may return a better fine-tuning.