import numpy as np
import copy
import random
import time
from ConsolePlay import TTTConsole
from numba import jit
JUDGE = {
"None": "none",
"OUT_OF_RANGE" : "out_of_range",
"OVERLAP": "overlap",
"WIN": "win",
"DRAW": "draw"
}
input_vec_length = 54
w1_length = 64
out_length = 9
gene_length = 54 * 64 + 64 * 9
battle_num = 100
population = 50
offspring_n = 24
generation = 100000
mutation_rate = 1.0/100.0
print_freq = 100
def init():
gene_list = [np.random.rand(gene_length) for i in range(population)]
return gene_list
def sigmoid(x):
return 1 / (1 + np.exp(x))
def RandTurn(board):
while True:
pos = np.random.randint(0, 8)
if board[pos] == 0:
break
return pos
def calc_penalty(judge, is_me):
if is_me:
penalty_dict = {
"overlap": 10000,
"win": 0,
"draw": 100
}
return penalty_dict[judge]
else:
penalty_dict = {
"overlap": 0,
"win": 1000,
"draw": 100
}
return penalty_dict[judge]
@jit
def nnTurn_module1(j, board_j, part_of_vec, input_vec):
if board_j < 0:
part_of_vec[board_j + 3] = 1.0
elif 0 < board_j:
part_of_vec[board_j + 2] = 1.0
input_vec[int(6 * j):int(6 * (j + 1))] = part_of_vec
return input_vec
@jit
def nnTurn_module2(input_vec, w1, w2):
input_vec = input_vec.reshape((1, -1))
out1 = np.dot(input_vec, w1)
out2 = 1 / (1 + np.exp(out1))
out3 = np.dot(out2, w2)
pos = np.argmax(out3)
return pos
def eval(gene_list):
penalty_list = list()
win_lose_draw_list = list()
for gene in gene_list:
w1 = gene[:(input_vec_length * w1_length)].reshape(w1_length, input_vec_length).T
w2 = gene[(input_vec_length * w1_length):].reshape(out_length, w1_length).T
def nnTurn(board):
input_vec = np.zeros(input_vec_length, dtype=np.float)
for j in range(9):
part_of_vec = np.zeros(6).astype(np.float)
input_vec = nnTurn_module1(j, board[j], part_of_vec, input_vec)
pos = nnTurn_module2(input_vec, w1, w2)
return pos
cur_penalty = 0
win_lose_draw = {
"win": 0,
"lose": 0,
"draw": 0,
"overlap": 0
}
for i in range(battle_num):
if i < battle_num // 2:
play = TTTConsole(nnTurn, RandTurn)
judge, player = play.Run()
cur_penalty += calc_penalty(judge, player == 1)
if judge == "win" and player == 1:
win_lose_draw["win"] += 1
elif judge == "draw":
win_lose_draw["draw"] += 1
elif judge == "overlap" and player == 1:
win_lose_draw["overlap"] += 1
else:
win_lose_draw["lose"] += 1
else:
play = TTTConsole(RandTurn, nnTurn)
judge, player = play.Run()
cur_penalty += calc_penalty(judge, player == 2)
if judge == "win" and player == 2:
win_lose_draw["win"] += 1
elif judge == "draw":
win_lose_draw["draw"] += 1
elif judge == "overlap" and player == 2:
win_lose_draw["overlap"] += 1
else:
win_lose_draw["lose"] += 1
penalty_list.append(cur_penalty)
win_lose_draw_list.append(win_lose_draw)
return penalty_list, win_lose_draw_list
@jit
def choice_parents_module(population):
parent_1_index = np.random.randint(0, population - 1)
parent_2_index = np.random.randint(0, population - 1)
return parent_1_index, parent_2_index
def choice_parents(gene_list, fitness):
parent_1_index, parent_2_index = choice_parents_module(population)
if fitness[parent_1_index] > fitness[parent_2_index]:
parent = gene_list[parent_2_index]
else:
parent = gene_list[parent_1_index]
return parent
@jit
def crossover_module(parent_1, parent_2):
cross_one = random.randint(0, gene_length)
cross_second = random.randint(cross_one, gene_length)
offspring_1 = parent_1.copy()
offspring_2 = parent_2.copy()
offspring_1[cross_one:cross_second] = parent_2[cross_one:cross_second]
offspring_2[cross_one:cross_second] = parent_1[cross_one:cross_second]
return offspring_1, offspring_2
def crossover(gene_list, penalty_list):
parent_1 = choice_parents(gene_list, penalty_list)
parent_2 = choice_parents(gene_list, penalty_list)
offspring_1, offspring_2 = crossover_module(parent_1, parent_2)
return offspring_1, offspring_2
@jit
def mutation(offspring):
target_index = np.where(np.random.rand(gene_length) < mutation_rate)
return_array = np.random.rand(gene_length)
return_array[target_index] = offspring[target_index]
return return_array
def elite(gene_list, penalty_list, next_gene, elite_n):
sort_penalty_list = sorted(penalty_list)
gen_tmp = []
for i in range(elite_n):
index = penalty_list.index(sort_penalty_list[i])
gen_tmp.append(gene_list[index])
gen_tmp.extend(next_gene)
return gen_tmp
def main():
next_gene = []
gene_list = init()
best_penalty = 9999999
for generation_count in range(generation):
next_gene.clear()
penalty_list, win_lose_draw_list = eval(gene_list)
min_penalty = min(penalty_list)
ave_penalty = sum(penalty_list) / len(penalty_list)
if min_penalty < best_penalty:
best_penalty = min_penalty
sort_penalty_list = sorted(penalty_list)
index = penalty_list.index(sort_penalty_list[0])
np.save("best_weight", gene_list[index])
# print_result
if generation_count % print_freq == 0:
sort_penalty_list = sorted(penalty_list)
index = penalty_list.index(sort_penalty_list[0])
best_result = win_lose_draw_list[index]
print(f"generation: {generation_count} || min penalty: {min_penalty} || ave_penalty: {ave_penalty}")
print(f"best result... win:{best_result['win']} draw:{best_result['draw']} lose:{best_result['lose']} overlap:{best_result['overlap']}")
for i in range(int(offspring_n / 2)):
offspring_1, offspring_2 = crossover(gene_list, penalty_list)
offspring_1 = mutation(offspring_1)
offspring_2 = mutation(offspring_2)
next_gene.extend([offspring_1, offspring_2])
gene_list = elite(gene_list, penalty_list, next_gene, (population - offspring_n))
if __name__ == '__main__':
main()