Newer
Older
PyTicTacToe / reinforcement / AdaptiveTTT.py
@sato sato on 1 Mar 2022 6 KB 最初のコミット
import numpy as np
from numba import jit
from copy import deepcopy


def checkReach(board, Is_me = True):
    board = deepcopy(board)
    lines = np.array([[0, 1, 2], [3, 4, 5], [6, 7, 8], [0, 3, 6], [1, 4, 7], [2, 5, 8], [0, 4, 8], [2, 4, 6]])
    if not Is_me:
        board *= -1
    for check_line in lines:
        line_state = np.array([board[i] for i in check_line])

        # 消え始めてから
        if np.any(line_state == 2) and np.any(line_state == 3):
            index = np.argmin(line_state)
            if board[check_line[index]] == 0:
                return check_line[index]

        # 序盤にリーチしてたとき
        if np.any(line_state == 1) and np.any(line_state == 2) and not np.any(board == 3):
            index = np.argmin(line_state)
            if board[check_line[index]] == 0:
                return check_line[index]

    return None


def SimpleCPU(board):
    win_pos = checkReach(board, Is_me=True)
    if win_pos is not None:
        return win_pos

    difencive_pos = checkReach(board, Is_me=False)
    if difencive_pos is not None:
        return difencive_pos

    while True:
        pos = np.random.randint(0, 8)
        if board[pos] == 0:
            break

    return pos


def RandTurn(board):
    pos = 0

    while True:
        pos = np.random.randint(0, 8)
        if board[pos] == 0:
            break
    return pos


PLAYER = {
    "None": 0,
    "First": 1,
    "Second": 2
}

JUDGE = {
    "None": "none",
    "OUT_OF_RANGE" : "out_of_range",
    "OVERLAP": "overlap",
    "WIN": "win",
    "DRAW": "draw"
}


class TicTacToe:

    def __init__(self, turn_limit = 100):
        self.BOARD_ROWS = 3
        self.BOARD_COLS = 3
        self.BOARD_SIZE = 9
        self.REMAIN_PIECES = 3
        self.PLAYERS = 2
        self.TURN_LIMIT = turn_limit
        self.NONE = 0
        self.LINES = np.array([[0, 1, 2], [3, 4, 5], [6, 7, 8], [0, 3, 6], [1, 4, 7], [2, 5, 8], [0, 4, 8], [2, 4, 6]])

        self.Board = np.array([self.NONE for i in range(self.BOARD_SIZE)])

    def Init(self):

        self.Player = PLAYER["First"]
        self.Turn = 1
        self.Judge = JUDGE["None"]
        self.Board = np.array([self.NONE for i in range(self.BOARD_SIZE)])


    def Set(self, pos):
        self.LastSet = pos

        if (pos < 0 or pos >= self.BOARD_SIZE):
            self.Judge = JUDGE["OUT_OF_RANGE"]
            return

        if (self.Board[pos] != 0):
            self.Judge = JUDGE["OVERLAP"]
            return

        if (self.Turn > self.REMAIN_PIECES):
            self.Board[np.where(0 < self.Board)] -= 1
            self.Board[pos] = self.REMAIN_PIECES
        else:
            self.Board[pos] = self.Turn

        for line in self.LINES:
            if (self.Board[line[0]] > 0 and self.Board[line[1]] > 0 and self.Board[line[2]] > 0):
                self.Judge = JUDGE["WIN"]
                return

        if self.Player == PLAYER["First"]:
            self.Player = PLAYER["Second"]
        else:
            self.Player = PLAYER["First"]
            self.Turn += 1
            if (self.Turn >= self.TURN_LIMIT):
                self.Judge = JUDGE["DRAW"]
                return
        self.FlipBoard()
        return

    def FlipBoard(self):
        self.Board *= -1

    def GetBoard(self):
        return self.Board


class TTTConsole:
    def __init__(self, Is_shown=False):
        self.PLAYER_STR = ["", "先手", "後手"]
        self.PLAYER_MARK = [" ", "○", "×"]

        self._ttt = TicTacToe()
        self._showNumber = True
        self.Is_shown = Is_shown

    def ShowTitle(self):
        print("拡張〇×ゲーム")

    def step(self, agent_act):
        # agent
        if self.Is_shown:
            print("")
            self.ShowBoard()
            print("{}の番".format(self.PlayerStr()))
        self._ttt.Set(agent_act(self._ttt.GetBoard()))
        if self.Is_shown:
            print("--> {}".format(self._ttt.LastSet + 1))

        if self._ttt.Judge != JUDGE["None"]:
            if self.Is_shown:
                self.ShowResult()
            return self._ttt.Judge, self._ttt.Player, self._ttt.GetBoard()

        # 敵
        if self.Is_shown:
            print("")
            self.ShowBoard()
            print("{}の番".format(self.PlayerStr()))
        self._ttt.Set(SimpleCPU(self._ttt.GetBoard()))
        if self.Is_shown:
            print("--> {}".format(self._ttt.LastSet + 1))

        if self._ttt.Judge != JUDGE["None"]:
            if self.Is_shown:
                self.ShowResult()

        return self._ttt.Judge, self._ttt.Player, self._ttt.GetBoard()


    def ShowBoard(self):
        print("--- ターン", self._ttt.Turn, " ---")

        flip = -1 if self._ttt.Player == PLAYER["Second"] else 1
        for row in range(self._ttt.BOARD_ROWS):
            print("    ", end="")
            for col in range(self._ttt.BOARD_COLS):
                pos = row * self._ttt.BOARD_COLS + col
                pIdx = PLAYER["First"] if self._ttt.Board[pos] * flip > 0 else PLAYER["Second"] if self._ttt.Board[pos] * flip < 0 else PLAYER["None"]
                print(self.PLAYER_MARK[pIdx], end="")
                if self._showNumber:
                    mark = "  " if self._ttt.Board[pos] == 0 else "{}".format(abs(self._ttt.Board[pos]))
                    print(mark, end="")
                if col < self._ttt.BOARD_COLS - 1:
                    print(" | ", end="")
            print("")
            if (row < self._ttt.BOARD_ROWS - 1):
                if self._showNumber:
                    print("    -----+------+-----")
                else:
                    print("    ---+----+---")

    def ShowResult(self):
        msg = ""
        if self._ttt.Judge == JUDGE["WIN"]:
            msg = "{}の勝利".format(self.PlayerStr())
        elif self._ttt.Judge == JUDGE["DRAW"]:
            msg = "引き分け"
        elif self._ttt.Judge == JUDGE["OUT_OF_RANGE"]:
            msg = "{}の反則負け(範囲外)".format(self.PlayerStr())
        elif self._ttt.Judge == JUDGE["OVERLAP"]:
            msg = "{}の反則負け(重ね置き)".format(self.PlayerStr())

        print("")
        self.ShowBoard()
        print(msg)

    def PlayerStr(self):
        return self.PLAYER_STR[self._ttt.Player]

    def init_TTTenv(self):
        self._ttt.Init()

    def reset_randTTT_env(self):
        self._ttt.Init()

        # ここは0~6にしたほうが良い?
        init_turn = np.random.randint(0, 1)

        for _ in range(init_turn):
            if self.Is_shown:
                print("")
                self.ShowBoard()
                print("{}の番".format(self.PlayerStr()))
            self._ttt.Set(RandTurn(self._ttt.GetBoard()))
            if self.Is_shown:
                print("--> {}".format(self._ttt.LastSet + 1))

            if self._ttt.Judge != JUDGE["None"]:
                self.ShowResult()

        state = tuple(self._ttt.GetBoard())
        return state