Click this button to return to main.
This work is a snippet reconstructed from an academic coursework 95-891 Introduction to Artificial Intelligence at Carnegie Mellon University.
<aside> ⚠️
This work has NOT been built from the ground up. The partially prepared ‘skeleton code’ was provided by the Teaching Team of 95-891.
</aside>
import numpy as np
import random
import matplotlib.pyplot as plt
%matplotlib inline
class TicTacToe4x4:
def __init__(self):
self.board = np.zeros((4, 4), dtype=int)
def reset(self):
self.board = np.zeros((4, 4), dtype=int)
def is_draw(self):
return not np.any(self.board == 0) # No zeros left on the board
def is_winner(self, player):
# Check rows, columns, and diagonals for a win
for i in range(4):
if np.all(self.board[i, :] == player) or np.all(self.board[:, i] == player):
return True
if np.all(np.diag(self.board) == player) or np.all(np.diag(np.fliplr(self.board)) == player):
return True
return False
def available_moves(self):
return np.argwhere(self.board == 0)
def make_move(self, move, player):
if self.board[move[0], move[1]] == 0:
self.board[move[0], move[1]] = player
return True
return False
def display(self):
print(self.board)
def random_player(board, player):
moves = board.available_moves()
return random.choice(moves)
def play_game():
board = TicTacToe4x4()
current_player = 1
while True:
move = random_player(board, current_player)
board.make_move(move, current_player)
if board.is_winner(current_player):
return current_player
if board.is_draw():
return 0 # Draw
current_player = 3 - current_player # Switch between 1 and 2
# Play the game 100 times and keep track of the results
results = []
for _ in range(100):
results.append(play_game())
# Compute the average win rate for the first player over the last fifty games
average_win_rates = []
for i in range(len(results)):
last_fifty = results[max(0, i-49):i+1]
average_win_rates.append(last_fifty.count(1) / len(last_fifty))
# Plotting the results
plt.figure(figsize=(12, 6))
plt.plot(average_win_rates, marker='o', linestyle='-')
plt.xlabel('Game Number')
plt.ylabel('Average Win Rate (Last Fifty Games)')
plt.title('Average Win Rate of the First Player Over the Last 50 Games')
plt.grid(True)
plt.show()

The win rate of 1p stabilized around 25%.
# MCTS Node Class
class MCTSNode:
def __init__(self, game_state, parent=None, move=None):
self.game_state = game_state
self.parent = parent
self.move = move
self.children = []
self.visits = 0
self.value = 0.0
# Check if a node is fully explanded
def is_fully_expanded(self):
return len(self.children) == len(self.game_state.available_moves())
# Select the best child based on the UCBT formula. The 1e-7 terms avoid dividing by 0
def best_child(self, exploration_weight=1.0):
scores = [
(child.value / (child.visits + 1e-7) +
exploration_weight * np.sqrt(np.log(self.visits) / (child.visits + 1e-7)))
for child in self.children
]
return self.children[np.argmax(scores)]
# Simulate a playthrough to the end of the game with both players making random moves
def rollout(self):
current_board = self.game_state
current_player = 1 if current_board.board[self.move[0], self.move[1]] == 2 else 2
while True:
if current_board.is_winner(current_player):
return 1 if current_player == 1 else 0
if current_board.is_draw():
return 0.5
move = random_player(current_board, current_player)
current_board.make_move(move, current_player)
current_player = 3 - current_player
# backpropagate the results of a simulated playthrough through the ancestors of a node
def backpropagate(self, value):
self.visits += 1
self.value += value
if self.parent:
self.parent.backpropagate(value)
#Here is a stub that you will need to fill out to implement the MCTS code, Code you need to write is marked by ***
# MCTS Algorithm Class
class MCTS:
# initialize the node's exploration_weight
def __init__(self, exploration_weight=1.0):
self.exploration_weight = exploration_weight
# search for the best move over 1000 simulations
def search(self, initial_state, n_simulations=1000):
#initialize the root to the initial state
root = MCTSNode(initial_state)
# for each simulation (up to n_simulations)
for _ in range(n_simulations):
# call select on the root node
node = self._select(root)
#if the node's game state is a draw or a win for either player, this simulation is done
if node.game_state.is_draw() or (node.move and node.game_state.is_winner(3 - node.game_state.board[node.move[0], node.move[1]])):
continue
# otherwise expand the node to find its children
else:
node = self._expand(node)
#set the reward to the results of playing out the game from the child node
node.value = node.rollout()
# backpropagate the reward from the child node
node.backpropagate(node.value)
return root.best_child().move
# While the node is not yet fully expanded, if the node is not a leaf (has children), then find the best child
def _select(self, node):
while node.is_fully_expanded():
if node.children:
#update the node to its best child, given the exploration weight
node = node.best_child()
else:
return node
return node
#choose a node to expand randomly
def _expand(self, node):
untried_moves = set(tuple(move) for move in node.game_state.available_moves()) - set(child.move for child in node.children)
move = random.choice(list(untried_moves))
child_game_state = TicTacToe4x4()
child_game_state.board = node.game_state.board.copy()
child_game_state.make_move(move, 3 - node.game_state.board[node.move[0], node.move[1]] if node.move else 1)
child_node = MCTSNode(child_game_state, parent=node, move=move)
node.children.append(child_node)
return child_node
# Player Functions
def random_player(board, player):
moves = board.available_moves()
return random.choice(moves)
def mcts_player(board, player):
mcts = MCTS()
return mcts.search(board)
# Game Playing Loop with MCTS for Player X
def play_game_mcts():
board = TicTacToe4x4()
current_player = 1
while True:
if current_player == 1:
# Choose the move using the mcts_player function
move = mcts_player(board, current_player)
else:
move = random_player(board, current_player)
board.make_move(move, current_player)
if board.is_winner(current_player):
return current_player
if board.is_draw():
return 0
current_player = 3 - current_player
# Running the Experiment
results_mcts = []
for _ in range(100):
results_mcts.append(play_game_mcts())
average_win_rates_mcts = []
for i in range(len(results_mcts)):
last_fifty = results_mcts[max(0, i-49):i+1]
# Append the average win rates over the last fifty games assigning 1 for a win and .5 for a draw
average_win_rates_mcts.append((last_fifty.count(1) / len(last_fifty)) + (last_fifty.count(0)*0.5 / len(last_fifty)))
# Plotting the Results
plt.figure(figsize=(12, 6))
plt.plot(average_win_rates_mcts, marker='o', linestyle='-', color='blue', label='Player X using MCTS')
plt.xlabel('Game Number')
plt.ylabel('Average Win Rate (Last 50 Games)')
plt.title('Average Win Rate of Player X (using MCTS) Over the Last 50 Games')
plt.grid(True)
plt.legend()
plt.show()

The MCTS player superbly outperforms the random player with more than double win rate: 55~65%.
# Perform some experiments to find the optimal value of the exploration_weight in best_child
# Exploration for Optimal Exploration Weight
exploration_weights = np.linspace(0.1, 2, 20) # Test values from 0.1 to 2, 20 points in total
win_rates_for_weights = []
# For each weight in exploration_weights
# initialize results_for_weight to be empty
# For each of 100 games
# call MCTS with the current exploration weight
# append the results of play_game_mcts to the current results_for_weight
# calculate the win rate actoss all results_for_weight, assigning 1 for a win and .5 for a draw
# Append the win rate for the current weight to win_rate_for_weights
for weight in exploration_weights:
results_for_weight = []
for _ in range(100):
MCTS.exploration_weight = weight
print(MCTS.exploration_weight) # printing for confirmation that weight changes
results_for_weight.append(play_game_mcts())
win_rate_for_weight = (results_for_weight.count(1) + (results_for_weight.count(0) * 0.5)) / len(results_for_weight)
win_rates_for_weights.append(win_rate_for_weight)
# Plotting the results
plt.figure(figsize=(12, 6))
plt.plot(exploration_weights, win_rates_for_weights, marker='o', linestyle='-')
plt.xlabel('Exploration Weight')
plt.ylabel('Win Rate for Player X (MCTS)')
plt.title('Win Rate of Player X using MCTS for Various Exploration Weights')
plt.grid(True)
plt.show()
optimal_weight = exploration_weights[np.argmax(win_rates_for_weights)]
print(optimal_weight)

...
...
...
0.1
Experiments with varying c value resulted in c=0.1 returning the highest win rate for p1.
This result was followed by c=1.1, which theoretically makes more sense, considering UCT/UCB.
More detailed (up to 2 decimal places) weight simulation may return a better fine-tuning.