Click this button to return to main.

SG BAIK’S ONLINE REPOSITORY


This work is a snippet reconstructed from an academic coursework 95-891 Introduction to Artificial Intelligence at Carnegie Mellon University.

<aside> ⚠️

This work has NOT been built from the ground up. The partially prepared ‘skeleton code’ was provided by the Teaching Team of 95-891.

</aside>

import numpy as np
import random
import matplotlib.pyplot as plt
%matplotlib inline
class TicTacToe4x4:
    def __init__(self):
        self.board = np.zeros((4, 4), dtype=int)
        
    def reset(self):
        self.board = np.zeros((4, 4), dtype=int)
        
    def is_draw(self):
        return not np.any(self.board == 0)  # No zeros left on the board
    
    def is_winner(self, player):
        # Check rows, columns, and diagonals for a win
        for i in range(4):
            if np.all(self.board[i, :] == player) or np.all(self.board[:, i] == player):
                return True
                
        if np.all(np.diag(self.board) == player) or np.all(np.diag(np.fliplr(self.board)) == player):
            return True
        
        return False
    
    def available_moves(self):
        return np.argwhere(self.board == 0)
    
    def make_move(self, move, player):
        if self.board[move[0], move[1]] == 0:
            self.board[move[0], move[1]] = player
            return True
        return False

    def display(self):
        print(self.board)
def random_player(board, player):
    moves = board.available_moves()
    return random.choice(moves)

def play_game():
    board = TicTacToe4x4()
    current_player = 1
    while True:
        move = random_player(board, current_player)
        board.make_move(move, current_player)
        if board.is_winner(current_player):
            return current_player
        if board.is_draw():
            return 0  # Draw
        current_player = 3 - current_player  # Switch between 1 and 2
        
# Play the game 100 times and keep track of the results
results = []
for _ in range(100):
    results.append(play_game())

# Compute the average win rate for the first player over the last fifty games
average_win_rates = []
for i in range(len(results)):
    last_fifty = results[max(0, i-49):i+1]
    average_win_rates.append(last_fifty.count(1) / len(last_fifty))

# Plotting the results
plt.figure(figsize=(12, 6))
plt.plot(average_win_rates, marker='o', linestyle='-')
plt.xlabel('Game Number')
plt.ylabel('Average Win Rate (Last Fifty Games)')
plt.title('Average Win Rate of the First Player Over the Last 50 Games')
plt.grid(True)
plt.show()

image.png

The win rate of 1p stabilized around 25%.

# MCTS Node Class
class MCTSNode:
    def __init__(self, game_state, parent=None, move=None):
        self.game_state = game_state
        self.parent = parent
        self.move = move
        self.children = []
        self.visits = 0
        self.value = 0.0

    # Check if a node is fully explanded
    def is_fully_expanded(self):
        return len(self.children) == len(self.game_state.available_moves())

    # Select the best child based on the UCBT formula. The 1e-7 terms avoid dividing by 0
    def best_child(self, exploration_weight=1.0):
        scores = [
            (child.value / (child.visits + 1e-7) +
             exploration_weight * np.sqrt(np.log(self.visits) / (child.visits + 1e-7)))
            for child in self.children
        ]
        return self.children[np.argmax(scores)]

    # Simulate a playthrough to the end of the game with both players making random moves
    def rollout(self):
        current_board = self.game_state
        current_player = 1 if current_board.board[self.move[0], self.move[1]] == 2 else 2
        while True:
            if current_board.is_winner(current_player):
                return 1 if current_player == 1 else 0
            if current_board.is_draw():
                return 0.5
            move = random_player(current_board, current_player)
            current_board.make_move(move, current_player)
            current_player = 3 - current_player

    # backpropagate the results of a simulated playthrough through the ancestors of a node
    def backpropagate(self, value):
        self.visits += 1
        self.value += value
        if self.parent:
            self.parent.backpropagate(value)
            
#Here is a stub that you will need to fill out to implement the MCTS code, Code you need to write is marked by ***
# MCTS Algorithm Class
class MCTS:
    # initialize the node's exploration_weight
    def __init__(self, exploration_weight=1.0):
        self.exploration_weight = exploration_weight

    # search for the best move over 1000 simulations
    def search(self, initial_state, n_simulations=1000):
        #initialize the root to the initial state
        root = MCTSNode(initial_state)
        # for each simulation (up to n_simulations)
        for _ in range(n_simulations):
            # call select on the root node
            node = self._select(root)
            #if the node's game state is a draw or a win for either player, this simulation is done
            if node.game_state.is_draw() or (node.move and node.game_state.is_winner(3 - node.game_state.board[node.move[0], node.move[1]])):
                continue
            # otherwise expand the node to find its children
            else:
                node = self._expand(node)
            #set the reward to the results of playing out the game from the child node
                node.value = node.rollout()
            # backpropagate the reward from the child node
                node.backpropagate(node.value)
        return root.best_child().move

        # While the node is not yet fully expanded, if the node is not a leaf (has children), then find the best child
    def _select(self, node):
        while node.is_fully_expanded():
            if node.children:
                #update the node to its best child, given the exploration weight
                node = node.best_child()
            else:
                return node
        return node
    #choose a node to expand randomly
    def _expand(self, node):
        untried_moves = set(tuple(move) for move in node.game_state.available_moves()) - set(child.move for child in node.children)
        move = random.choice(list(untried_moves))
        child_game_state = TicTacToe4x4()
        child_game_state.board = node.game_state.board.copy()
        child_game_state.make_move(move, 3 - node.game_state.board[node.move[0], node.move[1]] if node.move else 1)
        child_node = MCTSNode(child_game_state, parent=node, move=move)
        node.children.append(child_node)
        return child_node

# Player Functions
def random_player(board, player):
    moves = board.available_moves()
    return random.choice(moves)

def mcts_player(board, player):
    mcts = MCTS()
    return mcts.search(board)
    
# Game Playing Loop with MCTS for Player X
def play_game_mcts():
    board = TicTacToe4x4()
    current_player = 1
    while True:
        if current_player == 1:
            # Choose the move using the mcts_player function
            move = mcts_player(board, current_player)
        else:
            move = random_player(board, current_player)
        board.make_move(move, current_player)
        if board.is_winner(current_player):
            return current_player
        if board.is_draw():
            return 0
        current_player = 3 - current_player
        
# Running the Experiment
results_mcts = []
for _ in range(100):
    results_mcts.append(play_game_mcts())

average_win_rates_mcts = []
for i in range(len(results_mcts)):
    last_fifty = results_mcts[max(0, i-49):i+1]
    # Append the average win rates over the last fifty games assigning 1 for a win and .5 for a draw
    average_win_rates_mcts.append((last_fifty.count(1) / len(last_fifty)) + (last_fifty.count(0)*0.5 / len(last_fifty)))

# Plotting the Results
plt.figure(figsize=(12, 6))
plt.plot(average_win_rates_mcts, marker='o', linestyle='-', color='blue', label='Player X using MCTS')
plt.xlabel('Game Number')
plt.ylabel('Average Win Rate (Last 50 Games)')
plt.title('Average Win Rate of Player X (using MCTS) Over the Last 50 Games')
plt.grid(True)
plt.legend()
plt.show()

image.png

The MCTS player superbly outperforms the random player with more than double win rate: 55~65%.

# Perform some experiments to find the optimal value of the exploration_weight in best_child
# Exploration for Optimal Exploration Weight
exploration_weights = np.linspace(0.1, 2, 20)  # Test values from 0.1 to 2, 20 points in total
win_rates_for_weights = []

# For each weight in exploration_weights
#   initialize results_for_weight to be empty
#   For each of 100 games
#      call MCTS with the current exploration weight
#      append the results of play_game_mcts to the current results_for_weight
#   calculate the win rate actoss all results_for_weight, assigning 1 for a win and .5 for a draw
#   Append the win rate for the current weight to win_rate_for_weights
for weight in exploration_weights:
    results_for_weight = []
    for _ in range(100):
        MCTS.exploration_weight = weight
        print(MCTS.exploration_weight) # printing for confirmation that weight changes
        results_for_weight.append(play_game_mcts())
    win_rate_for_weight = (results_for_weight.count(1) + (results_for_weight.count(0) * 0.5)) / len(results_for_weight)
    win_rates_for_weights.append(win_rate_for_weight)   

# Plotting the results
plt.figure(figsize=(12, 6))
plt.plot(exploration_weights, win_rates_for_weights, marker='o', linestyle='-')
plt.xlabel('Exploration Weight')
plt.ylabel('Win Rate for Player X (MCTS)')
plt.title('Win Rate of Player X using MCTS for Various Exploration Weights')
plt.grid(True)
plt.show()

optimal_weight = exploration_weights[np.argmax(win_rates_for_weights)]
print(optimal_weight)

image.png

...
...
...
0.1

Experiments with varying c value resulted in c=0.1 returning the highest win rate for p1.

This result was followed by c=1.1, which theoretically makes more sense, considering UCT/UCB.

More detailed (up to 2 decimal places) weight simulation may return a better fine-tuning.