PyTicTacToe/ga.py at master - sato.yukiya/PyTicTacToe

Fork: 0
sato.yukiya / PyTicTacToe
Find file
Newer
Older
PyTicTacToe / ga.py
sato on 1 Mar 2022 6 KB 最初のコミット
Raw Blame History
import numpy as np
import copy
import random
import time
from ConsolePlay import TTTConsole
from numba import jit

JUDGE = {
    "None": "none",
    "OUT_OF_RANGE" : "out_of_range",
    "OVERLAP": "overlap",
    "WIN": "win",
    "DRAW": "draw"
}

input_vec_length = 54
w1_length = 64
out_length = 9

gene_length = 54 * 64 + 64 * 9

battle_num = 100

population = 50
offspring_n = 24
generation = 100000
mutation_rate = 1.0/100.0
print_freq = 100


def init():
    gene_list = [np.random.rand(gene_length) for i in range(population)]
    return gene_list


def sigmoid(x):
    return 1 / (1 + np.exp(x))


def RandTurn(board):
    while True:
        pos = np.random.randint(0, 8)
        if board[pos] == 0:
            break
    return pos


def calc_penalty(judge, is_me):
    if is_me:
        penalty_dict = {
            "overlap": 10000,
            "win": 0,
            "draw": 100
        }
        return penalty_dict[judge]
    else:
        penalty_dict = {
            "overlap": 0,
            "win": 1000,
            "draw": 100
        }
        return penalty_dict[judge]


@jit
def nnTurn_module1(j, board_j, part_of_vec, input_vec):
    if board_j < 0:
        part_of_vec[board_j + 3] = 1.0
    elif 0 < board_j:
        part_of_vec[board_j + 2] = 1.0
    input_vec[int(6 * j):int(6 * (j + 1))] = part_of_vec
    return input_vec

@jit
def nnTurn_module2(input_vec, w1, w2):
    input_vec = input_vec.reshape((1, -1))
    out1 = np.dot(input_vec, w1)
    out2 = 1 / (1 + np.exp(out1))
    out3 = np.dot(out2, w2)
    pos = np.argmax(out3)

    return pos


def eval(gene_list):
    penalty_list = list()
    win_lose_draw_list = list()
    for gene in gene_list:
        w1 = gene[:(input_vec_length * w1_length)].reshape(w1_length, input_vec_length).T
        w2 = gene[(input_vec_length * w1_length):].reshape(out_length, w1_length).T

        def nnTurn(board):
            input_vec = np.zeros(input_vec_length, dtype=np.float)
            for j in range(9):
                part_of_vec = np.zeros(6).astype(np.float)
                input_vec = nnTurn_module1(j, board[j], part_of_vec, input_vec)

            pos = nnTurn_module2(input_vec, w1, w2)
            return pos

        cur_penalty = 0
        win_lose_draw = {
            "win": 0,
            "lose": 0,
            "draw": 0,
            "overlap": 0
        }

        for i in range(battle_num):
            if i < battle_num // 2:
                play = TTTConsole(nnTurn, RandTurn)
                judge, player = play.Run()
                cur_penalty += calc_penalty(judge, player == 1)
                if judge == "win" and player == 1:
                    win_lose_draw["win"] += 1
                elif judge == "draw":
                    win_lose_draw["draw"] += 1
                elif judge == "overlap" and player == 1:
                    win_lose_draw["overlap"] += 1
                else:
                    win_lose_draw["lose"] += 1
            else:
                play = TTTConsole(RandTurn, nnTurn)
                judge, player = play.Run()
                cur_penalty += calc_penalty(judge, player == 2)
                if judge == "win" and player == 2:
                    win_lose_draw["win"] += 1
                elif judge == "draw":
                    win_lose_draw["draw"] += 1
                elif judge == "overlap" and player == 2:
                    win_lose_draw["overlap"] += 1
                else:
                    win_lose_draw["lose"] += 1
        penalty_list.append(cur_penalty)
        win_lose_draw_list.append(win_lose_draw)

    return penalty_list, win_lose_draw_list


@jit
def choice_parents_module(population):
    parent_1_index = np.random.randint(0, population - 1)
    parent_2_index = np.random.randint(0, population - 1)
    return parent_1_index, parent_2_index


def choice_parents(gene_list, fitness):
    parent_1_index, parent_2_index = choice_parents_module(population)
    if fitness[parent_1_index] > fitness[parent_2_index]:
        parent = gene_list[parent_2_index]
    else:
        parent = gene_list[parent_1_index]
    return parent


@jit
def crossover_module(parent_1, parent_2):
    cross_one = random.randint(0, gene_length)
    cross_second = random.randint(cross_one, gene_length)

    offspring_1 = parent_1.copy()
    offspring_2 = parent_2.copy()

    offspring_1[cross_one:cross_second] = parent_2[cross_one:cross_second]
    offspring_2[cross_one:cross_second] = parent_1[cross_one:cross_second]

    return offspring_1, offspring_2


def crossover(gene_list, penalty_list):
    parent_1 = choice_parents(gene_list, penalty_list)
    parent_2 = choice_parents(gene_list, penalty_list)

    offspring_1, offspring_2 = crossover_module(parent_1, parent_2)

    return offspring_1, offspring_2


@jit
def mutation(offspring):
    target_index = np.where(np.random.rand(gene_length) < mutation_rate)

    return_array = np.random.rand(gene_length)
    return_array[target_index] = offspring[target_index]

    return return_array


def elite(gene_list, penalty_list, next_gene, elite_n):
    sort_penalty_list = sorted(penalty_list)
    gen_tmp = []
    for i in range(elite_n):
        index = penalty_list.index(sort_penalty_list[i])
        gen_tmp.append(gene_list[index])
    gen_tmp.extend(next_gene)

    return gen_tmp


def main():
    next_gene = []

    gene_list = init()
    best_penalty = 9999999

    for generation_count in range(generation):
        next_gene.clear()
        penalty_list, win_lose_draw_list = eval(gene_list)
        min_penalty = min(penalty_list)
        ave_penalty = sum(penalty_list) / len(penalty_list)

        if min_penalty < best_penalty:
            best_penalty = min_penalty
            sort_penalty_list = sorted(penalty_list)
            index = penalty_list.index(sort_penalty_list[0])
            np.save("best_weight", gene_list[index])

        # print_result
        if generation_count % print_freq == 0:
            sort_penalty_list = sorted(penalty_list)
            index = penalty_list.index(sort_penalty_list[0])
            best_result = win_lose_draw_list[index]

            print(f"generation: {generation_count} || min penalty: {min_penalty} || ave_penalty: {ave_penalty}")
            print(f"best result... win:{best_result['win']}  draw:{best_result['draw']}  lose:{best_result['lose']}  overlap:{best_result['overlap']}")

        for i in range(int(offspring_n / 2)):
            offspring_1, offspring_2 = crossover(gene_list, penalty_list)
            offspring_1 = mutation(offspring_1)
            offspring_2 = mutation(offspring_2)
            next_gene.extend([offspring_1, offspring_2])

        gene_list = elite(gene_list, penalty_list, next_gene, (population - offspring_n))



if __name__ == '__main__':
    main()