""" Go board game """ import math import random from time import time SIZE = 9 GAMES = 200 KOMI = 7.5 EMPTY, WHITE, BLACK = 0, 1, 2 SHOW = {EMPTY: '.', WHITE: 'o', BLACK: 'x'} PASS = -1 MAXMOVES = SIZE * SIZE * 3 TIMESTAMP = 0 MOVES = 0 def to_pos(x, y): return y * SIZE + x def to_xy(pos): y, x = divmod(pos, SIZE) return x, y @dataclass(init=False) class Square[Board]: board: Board pos: int timestamp: int removestamp: int zobrist_strings: List[int] neighbours: Optional[List[Square[Board]]] color: int used: bool reference: Optional[Square[Board]] ledges: int temp_ledges: int def __init__(self, board, pos): self.board = board self.pos = pos self.timestamp = TIMESTAMP self.removestamp = TIMESTAMP self.zobrist_strings = [random.randrange(9223372036854775807) for i in range(3)] self.neighbours = None self.color = EMPTY self.used = False self.reference = None self.ledges = 0 self.temp_ledges = 0 def set_neighbours(self): x, y = self.pos % SIZE, self.pos // SIZE self.neighbours = [] for dx, dy in [(-1, 0), (1, 0), (0, -1), (0, 1)]: newx, newy = x + dx, y + dy if 0 <= newx < SIZE and 0 <= newy < SIZE: self.neighbours.append(self.board.squares[to_pos(newx, newy)]) def move(self, color): global TIMESTAMP, MOVES TIMESTAMP += 1 MOVES += 1 self.board.zobrist.update(self, color) self.color = color self.reference = self self.ledges = 0 self.used = True for neighbour in self.neighbours: neighcolor = neighbour.color if neighcolor == EMPTY: self.ledges += 1 else: neighbour_ref = neighbour.find(update=True) if neighcolor == color: if neighbour_ref.reference.pos != self.pos: self.ledges += neighbour_ref.ledges neighbour_ref.reference = self self.ledges -= 1 else: neighbour_ref.ledges -= 1 if neighbour_ref.ledges == 0: neighbour.remove(neighbour_ref) self.board.zobrist.add() def remove(self, reference, update=True): self.board.zobrist.update(self, EMPTY) self.removestamp = TIMESTAMP if update: self.color = EMPTY self.board.emptyset.add(self.pos) # if color == BLACK: # self.board.black_dead += 1 # else: # self.board.white_dead += 1 for neighbour in self.neighbours: if neighbour.color != EMPTY and neighbour.removestamp != TIMESTAMP: neighbour_ref = neighbour.find(update) if neighbour_ref.pos == reference.pos: neighbour.remove(reference, update) else: if update: neighbour_ref.ledges += 1 def find(self, update=False): reference = self.reference if reference.pos != self.pos: reference = reference.find(update) if update: self.reference = reference return reference def __repr__(self): return repr(to_xy(self.pos)) class EmptySet[Board]: board: Board empties: List[int] empty_pos: List[int] def __init__(self, board): self.board = board self.empties = list(range(SIZE * SIZE)) self.empty_pos = list(range(SIZE * SIZE)) def random_choice(self): choices = len(self.empties) while choices: i = int(random.random() * choices) pos = self.empties[i] if self.board.useful(pos): return pos choices -= 1 self.set(i, self.empties[choices]) self.set(choices, pos) return PASS def add(self, pos): self.empty_pos[pos] = len(self.empties) self.empties.append(pos) def remove(self, pos): self.set(self.empty_pos[pos], self.empties[len(self.empties) - 1]) self.empties.pop() def set(self, i, pos): self.empties[i] = pos self.empty_pos[pos] = i class ZobristHash[Board]: board: Board hash_set: Set[int] hash: int def __init__(self, board): self.board = board self.hash_set = set() self.hash = 0 for square in self.board.squares: self.hash ^= square.zobrist_strings[EMPTY] self.hash_set.clear() self.hash_set.add(self.hash) def update(self, square, color): self.hash ^= square.zobrist_strings[square.color] self.hash ^= square.zobrist_strings[color] def add(self): self.hash_set.add(self.hash) def dupe(self): return self.hash in self.hash_set class Board: squares: List[Square[Board]] emptyset: EmptySet[Board] zobrist: ZobristHash[Board] color: int finished: bool lastmove: int history: List[int] white_dead: int black_dead: int def __init__(self): self.squares = [Square(self, pos) for pos in range(SIZE * SIZE)] for square in self.squares: square.set_neighbours() self.reset() def reset(self): for square in self.squares: square.color = EMPTY square.used = False self.emptyset = EmptySet(self) self.zobrist = ZobristHash(self) self.color = BLACK self.finished = False self.lastmove = -2 self.history = [] self.white_dead = 0 self.black_dead = 0 def move(self, pos): square = self.squares[pos] if pos != PASS: square.move(self.color) self.emptyset.remove(square.pos) elif self.lastmove == PASS: self.finished = True if self.color == BLACK: self.color = WHITE else: self.color = BLACK self.lastmove = pos self.history.append(pos) def random_move(self): return self.emptyset.random_choice() def useful_fast(self, square): if not square.used: for neighbour in square.neighbours: if neighbour.color == EMPTY: return True return False def useful(self, pos): global TIMESTAMP TIMESTAMP += 1 square = self.squares[pos] if self.useful_fast(square): return True old_hash = self.zobrist.hash self.zobrist.update(square, self.color) empties = opps = weak_opps = neighs = weak_neighs = 0 for neighbour in square.neighbours: neighcolor = neighbour.color if neighcolor == EMPTY: empties += 1 continue neighbour_ref = neighbour.find() if neighbour_ref.timestamp != TIMESTAMP: if neighcolor == self.color: neighs += 1 else: opps += 1 neighbour_ref.timestamp = TIMESTAMP neighbour_ref.temp_ledges = neighbour_ref.ledges neighbour_ref.temp_ledges -= 1 if neighbour_ref.temp_ledges == 0: if neighcolor == self.color: weak_neighs += 1 else: weak_opps += 1 neighbour_ref.remove(neighbour_ref, update=False) dupe = self.zobrist.dupe() self.zobrist.hash = old_hash strong_neighs = neighs - weak_neighs strong_opps = opps - weak_opps return not dupe and \ (empties or weak_opps or (strong_neighs and (strong_opps or weak_neighs))) def useful_moves(self): return [pos for pos in self.emptyset.empties if self.useful(pos)] def replay(self, history): for pos in history: self.move(pos) def score(self, color): if color == WHITE: count = KOMI + self.black_dead else: count = float(self.white_dead) for square in self.squares: squarecolor = square.color if squarecolor == color: count += 1 elif squarecolor == EMPTY: surround = 0 for neighbour in square.neighbours: if neighbour.color == color: surround += 1 if surround == len(square.neighbours): count += 1 return count def check(self): for square in self.squares: if square.color == EMPTY: continue members1 = set([square]) changed = True while changed: changed = False for member in members1.copy(): for neighbour in member.neighbours: if neighbour.color == square.color and neighbour not in members1: changed = True members1.add(neighbour) ledges1 = 0 for member in members1: for neighbour in member.neighbours: if neighbour.color == EMPTY: ledges1 += 1 root = square.find() # print 'members1', square, root, members1 # print 'ledges1', square, ledges1 members2 = set() for square2 in self.squares: if square2.color != EMPTY and square2.find() == root: members2.add(square2) ledges2 = root.ledges # print 'members2', square, root, members1 # print 'ledges2', square, ledges2 assert members1 == members2 assert ledges1 == ledges2 set(self.emptyset.empties) empties2 = set() for square in self.squares: if square.color == EMPTY: empties2.add(square.pos) def __repr__(self): result = [] for y in range(SIZE): start = to_pos(0, y) result.append(''.join( [SHOW[square.color] + ' ' for square in self.squares[start:start + SIZE]])) return '\n'.join(result) class UCTNode: bestchild: Optional[UCTNode] pos: int wins: int losses: int pos_child: List[Optional[UCTNode]] parent: Optional[UCTNode] unexplored: List[int] def __init__(self): self.bestchild = None self.pos = -1 self.wins = 0 self.losses = 0 self.pos_child = [None for x in range(SIZE * SIZE)] self.parent = None self.unexplored = [] def play(self, board): """ uct tree search """ color = board.color node = self path = [node] while True: pos = node.select(board) if pos == PASS: break board.move(pos) child = node.pos_child[pos] if not child: child = node.pos_child[pos] = UCTNode() child.unexplored = board.useful_moves() child.pos = pos child.parent = node path.append(child) break path.append(child) node = child self.random_playout(board) self.update_path(board, color, path) def select(self, board): """ select move; unexplored children first, then according to uct value """ if self.unexplored: i = random.randrange(len(self.unexplored)) pos = self.unexplored[i] self.unexplored[i] = self.unexplored[len(self.unexplored) - 1] self.unexplored.pop() return pos elif self.bestchild: return self.bestchild.pos else: return PASS def random_playout(self, board): """ random play until both players pass """ for x in range(MAXMOVES): # XXX while not self.finished? if board.finished: break board.move(board.random_move()) def update_path(self, board, color, path): """ update win/loss count along path """ wins = board.score(BLACK) >= board.score(WHITE) for node in path: if color == BLACK: color = WHITE else: color = BLACK if wins == (color == BLACK): node.wins += 1 else: node.losses += 1 if node.parent: node.parent.bestchild = node.parent.best_child() def score(self): winrate = self.wins / float(self.wins + self.losses) parentvisits = self.parent.wins + self.parent.losses if not parentvisits: return winrate nodevisits = self.wins + self.losses return winrate + math.sqrt((math.log(parentvisits)) / (5 * nodevisits)) def best_child(self): maxscore = -1. maxchild = None for child in self.pos_child: if child and child.score() > maxscore: maxchild = child maxscore = child.score() return maxchild def best_visited(self): maxvisits = -1 maxchild = None for child in self.pos_child: # if child: # print to_xy(child.pos), child.wins, child.losses, child.score() if child and (child.wins + child.losses) > maxvisits: maxvisits, maxchild = (child.wins + child.losses), child return maxchild # def user_move(board): # while True: # text = input('?').strip() # if text == 'p': # return PASS # if text == 'q': # raise EOFError # try: # x, y = [int(i) for i in text.split()] # except ValueError: # continue # if not (0 <= x < SIZE and 0 <= y < SIZE): # continue # pos = to_pos(x, y) # if board.useful(pos): # return pos def computer_move(board): pos = board.random_move() if pos == PASS: return PASS tree = UCTNode() tree.unexplored = board.useful_moves() nboard = Board() for game in range(GAMES): node = tree nboard.reset() nboard.replay(board.history) node.play(nboard) return tree.best_visited().pos def versus_cpu(): for i in range(100): random.seed(i) board = Board() computer_move(board) if __name__ == "__main__": t0 = time() versus_cpu() t1 = time() print(t1 - t0)