Newer
Older
PyTicTacToe / reinforcement / QLearningUtils.py
@sato sato on 1 Mar 2022 1 KB 最初のコミット
JUDGE = {
    "None": "none",
    "OUT_OF_RANGE" : "out_of_range",
    "OVERLAP": "overlap",
    "WIN": "win",
    "DRAW": "draw"
}


def ReferRewrads(judge, is_me, board):
    state = tuple(board)
    if judge == "none":
        reward, is_end_episode = 0, False
        return state, reward, is_end_episode

    if is_me:
        reward_dict = {
            "out_of_range": -100,
            "overlap": -100,
            "win": 100,
            "draw": -1
        }
        is_end_episode = True

        return state, reward_dict[judge], is_end_episode

    else:
        reward_dict = {
            "out_of_range": 0,
            "overlap": 0,
            "win": -100,
            "draw": -1
        }
        is_end_episode = True

        return state, reward_dict[judge], is_end_episode


def decodeResult(judge, is_me):
    if judge == "draw":
        return "draw"
    elif is_me and judge == "win":
        return "win"
    elif is_me and judge == "overlap":
        return "overlap"
    else:
        return "lose"