diff --git a/__pycache__/GLOBAL.cpython-36.pyc b/__pycache__/GLOBAL.cpython-36.pyc new file mode 100644 index 0000000..105be00 --- /dev/null +++ b/__pycache__/GLOBAL.cpython-36.pyc Binary files differ diff --git a/__pycache__/ImageProcess.cpython-36.pyc b/__pycache__/ImageProcess.cpython-36.pyc new file mode 100644 index 0000000..069b966 --- /dev/null +++ b/__pycache__/ImageProcess.cpython-36.pyc Binary files differ diff --git a/__pycache__/LoadInputData.cpython-36.pyc b/__pycache__/LoadInputData.cpython-36.pyc new file mode 100644 index 0000000..1217d59 --- /dev/null +++ b/__pycache__/LoadInputData.cpython-36.pyc Binary files differ diff --git a/__pycache__/NonlinearFunction.cpython-36.pyc b/__pycache__/NonlinearFunction.cpython-36.pyc new file mode 100644 index 0000000..8e84ccb --- /dev/null +++ b/__pycache__/NonlinearFunction.cpython-36.pyc Binary files differ diff --git a/__pycache__/SignalProcess.cpython-36.pyc b/__pycache__/SignalProcess.cpython-36.pyc new file mode 100644 index 0000000..7b46d2a --- /dev/null +++ b/__pycache__/SignalProcess.cpython-36.pyc Binary files differ diff --git a/__pycache__/UtilProcess.cpython-36.pyc b/__pycache__/UtilProcess.cpython-36.pyc new file mode 100644 index 0000000..4db047d --- /dev/null +++ b/__pycache__/UtilProcess.cpython-36.pyc Binary files differ diff --git a/__pycache__/UtilsGUI.cpython-36.pyc b/__pycache__/UtilsGUI.cpython-36.pyc new file mode 100644 index 0000000..4e29a9a --- /dev/null +++ b/__pycache__/UtilsGUI.cpython-36.pyc Binary files differ diff --git a/__pycache__/rqiCalculation.cpython-36.pyc b/__pycache__/rqiCalculation.cpython-36.pyc new file mode 100644 index 0000000..c50608d --- /dev/null +++ b/__pycache__/rqiCalculation.cpython-36.pyc Binary files differ diff --git a/__pycache__/rrEstimation.cpython-36.pyc b/__pycache__/rrEstimation.cpython-36.pyc new file mode 100644 index 0000000..235fdb3 --- /dev/null +++ b/__pycache__/rrEstimation.cpython-36.pyc Binary files differ diff --git a/font/FiraMono-Medium.otf b/font/FiraMono-Medium.otf new file mode 100644 index 0000000..4f208e9 --- /dev/null +++ b/font/FiraMono-Medium.otf Binary files differ diff --git a/font/SIL Open Font License.txt b/font/SIL Open Font License.txt new file mode 100644 index 0000000..285151a --- /dev/null +++ b/font/SIL Open Font License.txt @@ -0,0 +1,45 @@ +Copyright (c) 2014, Mozilla Foundation https://mozilla.org/ with Reserved Font Name Fira Mono. + +Copyright (c) 2014, Telefonica S.A. + +This Font Software is licensed under the SIL Open Font License, Version 1.1. +This license is copied below, and is also available with a FAQ at: http://scripts.sil.org/OFL + +----------------------------------------------------------- +SIL OPEN FONT LICENSE Version 1.1 - 26 February 2007 +----------------------------------------------------------- + +PREAMBLE +The goals of the Open Font License (OFL) are to stimulate worldwide development of collaborative font projects, to support the font creation efforts of academic and linguistic communities, and to provide a free and open framework in which fonts may be shared and improved in partnership with others. + +The OFL allows the licensed fonts to be used, studied, modified and redistributed freely as long as they are not sold by themselves. The fonts, including any derivative works, can be bundled, embedded, redistributed and/or sold with any software provided that any reserved names are not used by derivative works. The fonts and derivatives, however, cannot be released under any other type of license. The requirement for fonts to remain under this license does not apply to any document created using the fonts or their derivatives. + +DEFINITIONS +"Font Software" refers to the set of files released by the Copyright Holder(s) under this license and clearly marked as such. This may include source files, build scripts and documentation. + +"Reserved Font Name" refers to any names specified as such after the copyright statement(s). + +"Original Version" refers to the collection of Font Software components as distributed by the Copyright Holder(s). + +"Modified Version" refers to any derivative made by adding to, deleting, or substituting -- in part or in whole -- any of the components of the Original Version, by changing formats or by porting the Font Software to a new environment. + +"Author" refers to any designer, engineer, programmer, technical writer or other person who contributed to the Font Software. + +PERMISSION & CONDITIONS +Permission is hereby granted, free of charge, to any person obtaining a copy of the Font Software, to use, study, copy, merge, embed, modify, redistribute, and sell modified and unmodified copies of the Font Software, subject to the following conditions: + +1) Neither the Font Software nor any of its individual components, in Original or Modified Versions, may be sold by itself. + +2) Original or Modified Versions of the Font Software may be bundled, redistributed and/or sold with any software, provided that each copy contains the above copyright notice and this license. These can be included either as stand-alone text files, human-readable headers or in the appropriate machine-readable metadata fields within text or binary files as long as those fields can be easily viewed by the user. + +3) No Modified Version of the Font Software may use the Reserved Font Name(s) unless explicit written permission is granted by the corresponding Copyright Holder. This restriction only applies to the primary font name as presented to the users. + +4) The name(s) of the Copyright Holder(s) or the Author(s) of the Font Software shall not be used to promote, endorse or advertise any Modified Version, except to acknowledge the contribution(s) of the Copyright Holder(s) and the Author(s) or with their explicit written permission. + +5) The Font Software, modified or unmodified, in part or in whole, must be distributed entirely under this license, and must not be distributed under any other license. The requirement for fonts to remain under this license does not apply to any document created using the Font Software. + +TERMINATION +This license becomes null and void if any of the above conditions are not met. + +DISCLAIMER +THE FONT SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO ANY WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT OF COPYRIGHT, PATENT, TRADEMARK, OR OTHER RIGHT. IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, INCLUDING ANY GENERAL, SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF THE USE OR INABILITY TO USE THE FONT SOFTWARE OR FROM OTHER DEALINGS IN THE FONT SOFTWARE. \ No newline at end of file diff --git a/model_data/classes.txt b/model_data/classes.txt new file mode 100644 index 0000000..712856b --- /dev/null +++ b/model_data/classes.txt @@ -0,0 +1,3 @@ +face +mouth +nose \ No newline at end of file diff --git a/model_data/yolo_anchors.txt b/model_data/yolo_anchors.txt new file mode 100644 index 0000000..9cdfb96 --- /dev/null +++ b/model_data/yolo_anchors.txt @@ -0,0 +1 @@ +10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326 diff --git a/yoloV3/__pycache__/detect.cpython-36.pyc b/yoloV3/__pycache__/detect.cpython-36.pyc new file mode 100644 index 0000000..0dfcdc7 --- /dev/null +++ b/yoloV3/__pycache__/detect.cpython-36.pyc Binary files differ diff --git a/yoloV3/__pycache__/model.cpython-36.pyc b/yoloV3/__pycache__/model.cpython-36.pyc new file mode 100644 index 0000000..a297a07 --- /dev/null +++ b/yoloV3/__pycache__/model.cpython-36.pyc Binary files differ diff --git a/yoloV3/__pycache__/utils.cpython-36.pyc b/yoloV3/__pycache__/utils.cpython-36.pyc new file mode 100644 index 0000000..3954540 --- /dev/null +++ b/yoloV3/__pycache__/utils.cpython-36.pyc Binary files differ diff --git a/yoloV3/detect.py b/yoloV3/detect.py new file mode 100644 index 0000000..8731d35 --- /dev/null +++ b/yoloV3/detect.py @@ -0,0 +1,411 @@ +# -*- coding: utf-8 -*- +""" +Class definition of YOLO_v3 style detection model on image and video +""" +import matplotlib.pyplot as plt + +import colorsys +import os +from timeit import default_timer as timer +import cv2 + +import numpy as np +from keras import backend as K +from keras.models import load_model +from keras.layers import Input +from PIL import Image, ImageFont, ImageDraw + +from yoloV3.model import yolo_eval, yolo_body, tiny_yolo_body +from yoloV3.utils import letterbox_image +import os +from keras.utils import multi_gpu_model +from GLOBAL import * +OUTPUT_PATH = os.path.join(r"C:\Users\takah\Desktop\GM", "video1.avi") + + +class YOLO(object): + _defaults = { + "model_path": MODEL_PATH, + "anchors_path": ANCHORS_PATH, + "classes_path": CLASSES_PATH, + "score": SCORE, + "iou": IOU, + "model_image_size": MODEL_SIZE_IMAGE, + "gpu_num": GPU_NUM, + } + + @classmethod + def get_defaults(cls, n): + if n in cls._defaults: + return cls._defaults[n] + else: + return "Unrecognized attribute name '" + n + "'" + + def __init__(self, **kwargs): + self.__dict__.update(self._defaults) # set up default values + self.__dict__.update(kwargs) # and update with user overrides + self.class_names = self._get_class() + self.anchors = self._get_anchors() + self.sess = K.get_session() + self.boxes, self.scores, self.classes = self.generate() + + def _get_class(self): + classes_path = os.path.expanduser(self.classes_path) + with open(classes_path) as f: + class_names = f.readlines() + class_names = [c.strip() for c in class_names] + return class_names + + def _get_anchors(self): + anchors_path = os.path.expanduser(self.anchors_path) + with open(anchors_path) as f: + anchors = f.readline() + anchors = [float(x) for x in anchors.split(',')] + return np.array(anchors).reshape(-1, 2) + + def generate(self): + model_path = os.path.expanduser(self.model_path) + assert model_path.endswith('.h5'), 'Keras model or weights_noseonly must be a .h5 file.' + + # Load model, or construct model and load weights_noseonly. + num_anchors = len(self.anchors) + num_classes = len(self.class_names) + is_tiny_version = num_anchors==6 # default setting + try: + self.yolo_model = load_model(model_path, compile=False) + except: + self.yolo_model = tiny_yolo_body(Input(shape=(None,None,3)), num_anchors//2, num_classes) \ + if is_tiny_version else yolo_body(Input(shape=(None,None,3)), num_anchors//3, num_classes) + self.yolo_model.load_weights(self.model_path) # make sure model, anchors and classes match + else: + assert self.yolo_model.layers[-1].output_shape[-1] == \ + num_anchors/len(self.yolo_model.output) * (num_classes + 5), \ + 'Mismatch between model and given anchor and class sizes' + + print('{} model, anchors, and classes loaded.'.format(model_path)) + + # Generate colors for drawing bounding boxes. + hsv_tuples = [(x / len(self.class_names), 1., 1.) + for x in range(len(self.class_names))] + self.colors = list(map(lambda x: colorsys.hsv_to_rgb(*x), hsv_tuples)) + self.colors = list( + map(lambda x: (int(x[0] * 255), int(x[1] * 255), int(x[2] * 255)), + self.colors)) + np.random.seed(10101) # Fixed seed for consistent colors across runs. + np.random.shuffle(self.colors) # Shuffle colors to decorrelate adjacent classes. + np.random.seed(None) # Reset seed to default. + # Generate output tensor targets for filtered bounding boxes. + self.input_image_shape = K.placeholder(shape=(2, )) + if self.gpu_num>=2: + self.yolo_model = multi_gpu_model(self.yolo_model, gpus=self.gpu_num) + boxes, scores, classes = yolo_eval(self.yolo_model.output, self.anchors, + len(self.class_names), self.input_image_shape, + score_threshold=self.score, iou_threshold=self.iou) + return boxes, scores, classes + + def calc_detection_score(self, _out_classes, _out_scores, _out_boxes): + """戻り値 : 口のboxとscore, 鼻のboxとscore""" + face_id, mouse_id, nose_id = 0, 1, 2 + + # 顔のDetection結果を削除 + face_indexes = [] + for i in range(len(_out_classes)): + if self.class_names[_out_classes[i]] == 'face': + face_indexes.append(i) + + for face_index in reversed(face_indexes): + _out_classes = np.delete(_out_classes, face_index) + _out_scores = np.delete(_out_scores, face_index) + _out_boxes = np.delete(_out_boxes, face_index, axis=0) + + # print(len(_out_classes)) + if len(_out_classes) == 0: + return None, 0, None, 0 + + # 最も確信度が大きいB-boxを取得 + conf_max_index = np.argmax(_out_scores) + conf_max_class = self.class_names[_out_classes[conf_max_index]] + conf_max_box = _out_boxes[conf_max_index] + top, left, bottom, right = conf_max_box + height = abs(top-bottom) + conf_max_center = [(top+bottom)/2, (left+right)/2] + # print(conf_max_class) + + # 確信度が低いクラスのほうのB-boxについて, 確信度と選択B-boxとの距離でスコアを算出 + scores = [] + detection_scores = [] + + for i, c in enumerate(_out_classes): + if self.class_names[c] != conf_max_class: + top, left, bottom, right = _out_boxes[i] + center = [(top + bottom) / 2, (left + right) / 2] + detection_score = _out_scores[i] + detection_scores.append(detection_scores) + distance = np.sqrt((center[0]-conf_max_center[0])**2 + (center[1]-conf_max_center[1])**2) + score = detection_score + np.tanh(distance/height/2) + scores.append(score) + # print(score, distance, detection_score) + else: + scores.append(0) + + if len(scores) == 0: + another_score = 0 + another_box = None + else: + another_score = np.max(scores) + another_box = _out_boxes[np.argmax(scores)] + + if conf_max_class == 'mouse': + return conf_max_box, np.max(_out_scores), another_box, another_score + else: + return another_box, another_score, conf_max_box, np.max(_out_scores) + + def detect_face(self, pil_image, pre_box_n=[0, 0, 0, 0], pre_box_m=[0, 0, 0, 0], display_score=True, display_face=False, ): + """ + :param pil_image: Imageオブジェクト(pillow) + :param pre_box_n ; 前フレームの鼻のB-box + :param pre_box_m ; 前フレームの口のB-box + :param display_score : scoreを表示するか(T/F) + :param display_face : faceを表示するか + :return: + """ + start = timer() + font = ImageFont.truetype(font=r'font/FiraMono-Medium.otf', + size=np.floor(3e-2 * pil_image.size[1] + 0.5).astype('int32')) + thickness = (pil_image.size[0] + pil_image.size[1]) // 300 + + if self.model_image_size != (None, None): + assert self.model_image_size[0]%32 == 0, 'Multiples of 32 required' + assert self.model_image_size[1]%32 == 0, 'Multiples of 32 required' + boxed_image = letterbox_image(pil_image, tuple(reversed(self.model_image_size))) + else: + new_image_size = (pil_image.width - (pil_image.width % 32), + pil_image.height - (pil_image.height % 32)) + boxed_image = letterbox_image(pil_image, new_image_size) + image_data = np.array(boxed_image, dtype='float32') + + image_data /= 255. + image_data = np.expand_dims(image_data, 0) # Add batch dimension. + + out_boxes, out_scores, out_classes = self.sess.run( + [self.boxes, self.scores, self.classes], + feed_dict={ + self.yolo_model.input: image_data, + self.input_image_shape: [pil_image.size[1], pil_image.size[0]], + K.learning_phase(): 0 + }) + + # print('Found {} boxes for {}'.format(len(out_boxes), 'img')) + + face_box = [0, 0, 0, 0] + + for i, c in reversed(list(enumerate(out_classes))): + predicted_class = self.class_names[c] + if predicted_class != 'face': + continue + box = out_boxes[i] + face_score = out_scores[i] + + top, left, bottom, right = box + top = max(0, np.floor(top + 0.5).astype('int32')) + left = max(0, np.floor(left + 0.5).astype('int32')) + bottom = min(pil_image.size[1], np.floor(bottom + 0.5).astype('int32')) + right = min(pil_image.size[0], np.floor(right + 0.5).astype('int32')) + face_box = [top, left, bottom, right] + + # My kingdom for a good redistributable image drawing library. + + draw = ImageDraw.Draw(pil_image) + + if face_box != [0, 0, 0, 0]: + top, left, bottom, right = face_box + for i in range(thickness): + draw.rectangle( + [left + i, top + i, right - i, bottom - i], + outline=(255, 0, 0)) + # scoreを非表示に + label = '{} {:.2f}'.format('face', face_score) + label_size = draw.textsize(label, font) + if top - label_size[1] >= 0: + text_origin = np.array([left, top - label_size[1]]) + else: + text_origin = np.array([left, top + 1]) + + if display_score: + draw.rectangle( + [tuple(text_origin), tuple(text_origin + label_size)], + fill=(255, 0, 0)) + draw.text(text_origin, label, fill=(255, 255, 255), font=font) + end = timer() + del draw + return pil_image, np.array(face_box) + + def close_session(self): + self.sess.close() + + def detect_revised(self, pil_image, display_score=False, display_face=False, ): + """ + :param pil_image: Imageオブジェクト(pillow) + :param pre_box_n ; 前フレームの鼻のB-box + :param pre_box_m ; 前フレームの口のB-box + :param display_score : scoreを表示するか(T/F) + :param display_face : faceを表示するか + :return: + """ + start = timer() + font = ImageFont.truetype(font=r'font/FiraMono-Medium.otf', + size=np.floor(3e-2 * pil_image.size[1] + 0.5).astype('int32')) + thickness = (pil_image.size[0] + pil_image.size[1]) // 300 + + if self.model_image_size != (None, None): + assert self.model_image_size[0] % 32 == 0, 'Multiples of 32 required' + assert self.model_image_size[1] % 32 == 0, 'Multiples of 32 required' + boxed_image = letterbox_image(pil_image, tuple(reversed(self.model_image_size))) + else: + new_image_size = (pil_image.width - (pil_image.width % 32), + pil_image.height - (pil_image.height % 32)) + boxed_image = letterbox_image(pil_image, new_image_size) + image_data = np.array(boxed_image, dtype='float32') + + image_data /= 255. + image_data = np.expand_dims(image_data, 0) # Add batch dimension. + + out_boxes, out_scores, out_classes = self.sess.run( + [self.boxes, self.scores, self.classes], + feed_dict={ + self.yolo_model.input: image_data, + self.input_image_shape: [pil_image.size[1], pil_image.size[0]], + K.learning_phase(): 0 + }) + + for i, c in reversed(list(enumerate(out_classes))): + predicted_class = self.class_names[c] + if predicted_class != 'face': + continue + box = out_boxes[i] + face_score = out_scores[i] + + top, left, bottom, right = box + top = max(0, np.floor(top + 0.5).astype('int32')) + left = max(0, np.floor(left + 0.5).astype('int32')) + bottom = min(pil_image.size[1], np.floor(bottom + 0.5).astype('int32')) + right = min(pil_image.size[0], np.floor(right + 0.5).astype('int32')) + face_box = [top, left, bottom, right] + + mouth_box, mouth_score, nose_box, nose_score = self.calc_detection_score(out_classes, out_scores, out_boxes) + + draw = ImageDraw.Draw(pil_image) + boxes = [mouth_box, nose_box] + scores = [mouth_score, nose_score] + for i, class_name in enumerate(CLASS_NAMES): + if boxes[i] is None: + boxes[i] = np.array([0, 0, 0, 0]) + continue + top, left, bottom, right = boxes[i] + for j in range(thickness): + draw.rectangle( + [left + j, top + j, right - j, bottom - j], + outline=CLASS_COLORS[class_name]) + label = '{} {:.2f}'.format(class_name, scores[i]) + label_size = draw.textsize(label, font) + if top - label_size[1] >= 0: + text_origin = np.array([left, top - label_size[1]]) + else: + text_origin = np.array([left, top + 1]) + + if display_score: + draw.rectangle( + [tuple(text_origin), tuple(text_origin + label_size)], + fill=(CLASS_COLORS[class_name])) + draw.text(text_origin, label, fill=(255, 255, 255), font=font) + + del draw + return pil_image, nose_box, mouth_box, face_box + + def detect_fromConf(self, pil_image, display_score=True, display_face=False): + """ + :param pil_image: Imageオブジェクト(pillow) + :param display_score : scoreを表示するか(T/F) + :param display_face : faceを表示するか + :return: + """ + start = timer() + font = ImageFont.truetype(font=r'G:\GUI4CalcRR\font\FiraMono-Medium.otf', + size=np.floor(3e-2 * pil_image.size[1] + 0.5).astype('int32')) + thickness = (pil_image.size[0] + pil_image.size[1]) // 300 + + if self.model_image_size != (None, None): + assert self.model_image_size[0]%32 == 0, 'Multiples of 32 required' + assert self.model_image_size[1]%32 == 0, 'Multiples of 32 required' + boxed_image = letterbox_image(pil_image, tuple(reversed(self.model_image_size))) + else: + new_image_size = (pil_image.width - (pil_image.width % 32), + pil_image.height - (pil_image.height % 32)) + boxed_image = letterbox_image(pil_image, new_image_size) + image_data = np.array(boxed_image, dtype='float32') + + image_data /= 255. + image_data = np.expand_dims(image_data, 0) # Add batch dimension. + + out_boxes, out_scores, out_classes = self.sess.run( + [self.boxes, self.scores, self.classes], + feed_dict={ + self.yolo_model.input: image_data, + self.input_image_shape: [pil_image.size[1], pil_image.size[0]], + K.learning_phase(): 0 + }) + + # print('Found {} boxes for {}'.format(len(out_boxes), 'img')) + + max_confidence = {"face": 0, "mouth": 0, "nose": 0} + max_confidence_box = {"face": [0, 0, 0, 0], "mouth": [0, 0, 0, 0], + "nose": [0, 0, 0, 0]} + + for i, c in reversed(list(enumerate(out_classes))): + predicted_class = self.class_names[c] + box = out_boxes[i] + score = out_scores[i] + if max_confidence[predicted_class] > score: + continue + + max_confidence[predicted_class] = score + + top, left, bottom, right = box + top = max(0, np.floor(top + 0.5).astype('int32')) + left = max(0, np.floor(left + 0.5).astype('int32')) + bottom = min(pil_image.size[1], np.floor(bottom + 0.5).astype('int32')) + right = min(pil_image.size[0], np.floor(right + 0.5).astype('int32')) + max_confidence_box[predicted_class] = [top, left, bottom, right] + + # CLASS_COLORS = {"face": (255, 0, 0), "mouth": (0, 255, 0), "nose": (0, 0, 255)} + draw = ImageDraw.Draw(pil_image) + + # for class_name in ["face", "nose", "mouth"]: + for class_name in ["face"]: + box = max_confidence_box[class_name] + score = max_confidence[class_name] + if box == [0, 0, 0, 0]: + continue + + top, left, bottom, right = box + for j in range(thickness): + draw.rectangle( + [left + j, top + j, right - j, bottom - j], + outline=CLASS_COLORS[class_name]) + # label = '{} {:.2f}'.format(class_name, score) + # label_size = draw.textsize(label, font) + # if top - label_size[1] >= 0: + # text_origin = np.array([left, top - label_size[1]]) + # else: + # text_origin = np.array([left, top + 1]) + # + # if display_score: + # draw.rectangle( + # [tuple(text_origin), tuple(text_origin + label_size)], + # fill=(CLASS_COLORS[class_name])) + + return [pil_image, np.array(max_confidence_box["nose"]), + np.array(max_confidence_box["mouth"]), np.array(max_confidence_box["face"])] + + + diff --git a/yoloV3/model.py b/yoloV3/model.py new file mode 100644 index 0000000..38677fd --- /dev/null +++ b/yoloV3/model.py @@ -0,0 +1,413 @@ +"""YOLO_v3 Model Defined in Keras.""" + +from functools import wraps + +import numpy as np +import tensorflow as tf +from keras import backend as K +from keras.layers import Conv2D, Add, ZeroPadding2D, UpSampling2D, Concatenate, MaxPooling2D +from keras.layers.advanced_activations import LeakyReLU +from keras.layers.normalization import BatchNormalization +from keras.models import Model +from keras.regularizers import l2 + +from yoloV3.utils import compose + + +@wraps(Conv2D) +def DarknetConv2D(*args, **kwargs): + """Wrapper to set Darknet parameters for Convolution2D.""" + darknet_conv_kwargs = {'kernel_regularizer': l2(5e-4)} + darknet_conv_kwargs['padding'] = 'valid' if kwargs.get('strides')==(2,2) else 'same' + darknet_conv_kwargs.update(kwargs) + return Conv2D(*args, **darknet_conv_kwargs) + +def DarknetConv2D_BN_Leaky(*args, **kwargs): + """Darknet Convolution2D followed by BatchNormalization and LeakyReLU.""" + no_bias_kwargs = {'use_bias': False} + no_bias_kwargs.update(kwargs) + return compose( + DarknetConv2D(*args, **no_bias_kwargs), + BatchNormalization(), + LeakyReLU(alpha=0.1)) + +def resblock_body(x, num_filters, num_blocks): + '''A series of resblocks starting with a downsampling Convolution2D''' + # Darknet uses left and top padding instead of 'same' mode + x = ZeroPadding2D(((1,0),(1,0)))(x) + x = DarknetConv2D_BN_Leaky(num_filters, (3,3), strides=(2,2))(x) + for i in range(num_blocks): + y = compose( + DarknetConv2D_BN_Leaky(num_filters//2, (1,1)), + DarknetConv2D_BN_Leaky(num_filters, (3,3)))(x) + x = Add()([x,y]) + return x + +def darknet_body(x): + '''Darknent body having 52 Convolution2D layers''' + x = DarknetConv2D_BN_Leaky(32, (3,3))(x) + x = resblock_body(x, 64, 1) + x = resblock_body(x, 128, 2) + x = resblock_body(x, 256, 8) + x = resblock_body(x, 512, 8) + x = resblock_body(x, 1024, 4) + return x + +def make_last_layers(x, num_filters, out_filters): + '''6 Conv2D_BN_Leaky layers followed by a Conv2D_linear layer''' + x = compose( + DarknetConv2D_BN_Leaky(num_filters, (1,1)), + DarknetConv2D_BN_Leaky(num_filters*2, (3,3)), + DarknetConv2D_BN_Leaky(num_filters, (1,1)), + DarknetConv2D_BN_Leaky(num_filters*2, (3,3)), + DarknetConv2D_BN_Leaky(num_filters, (1,1)))(x) + y = compose( + DarknetConv2D_BN_Leaky(num_filters*2, (3,3)), + DarknetConv2D(out_filters, (1,1)))(x) + return x, y + + +def yolo_body(inputs, num_anchors, num_classes): + """Create YOLO_V3 model CNN body in Keras.""" + darknet = Model(inputs, darknet_body(inputs)) + x, y1 = make_last_layers(darknet.output, 512, num_anchors*(num_classes+5)) + + x = compose( + DarknetConv2D_BN_Leaky(256, (1,1)), + UpSampling2D(2))(x) + x = Concatenate()([x,darknet.layers[152].output]) + x, y2 = make_last_layers(x, 256, num_anchors*(num_classes+5)) + + x = compose( + DarknetConv2D_BN_Leaky(128, (1,1)), + UpSampling2D(2))(x) + x = Concatenate()([x,darknet.layers[92].output]) + x, y3 = make_last_layers(x, 128, num_anchors*(num_classes+5)) + + return Model(inputs, [y1,y2,y3]) + + +def tiny_yolo_body(inputs, num_anchors, num_classes): + '''Create Tiny YOLO_v3 model CNN body in keras.''' + x1 = compose( + DarknetConv2D_BN_Leaky(16, (3,3)), + MaxPooling2D(pool_size=(2,2), strides=(2,2), padding='same'), + DarknetConv2D_BN_Leaky(32, (3,3)), + MaxPooling2D(pool_size=(2,2), strides=(2,2), padding='same'), + DarknetConv2D_BN_Leaky(64, (3,3)), + MaxPooling2D(pool_size=(2,2), strides=(2,2), padding='same'), + DarknetConv2D_BN_Leaky(128, (3,3)), + MaxPooling2D(pool_size=(2,2), strides=(2,2), padding='same'), + DarknetConv2D_BN_Leaky(256, (3,3)))(inputs) + x2 = compose( + MaxPooling2D(pool_size=(2,2), strides=(2,2), padding='same'), + DarknetConv2D_BN_Leaky(512, (3,3)), + MaxPooling2D(pool_size=(2,2), strides=(1,1), padding='same'), + DarknetConv2D_BN_Leaky(1024, (3,3)), + DarknetConv2D_BN_Leaky(256, (1,1)))(x1) + y1 = compose( + DarknetConv2D_BN_Leaky(512, (3,3)), + DarknetConv2D(num_anchors*(num_classes+5), (1,1)))(x2) + + x2 = compose( + DarknetConv2D_BN_Leaky(128, (1,1)), + UpSampling2D(2))(x2) + y2 = compose( + Concatenate(), + DarknetConv2D_BN_Leaky(256, (3,3)), + DarknetConv2D(num_anchors*(num_classes+5), (1,1)))([x2,x1]) + + return Model(inputs, [y1,y2]) + + +def yolo_head(feats, anchors, num_classes, input_shape, calc_loss=False): + """Convert final layer features to bounding box parameters.""" + num_anchors = len(anchors) + # Reshape to batch, height, width, num_anchors, box_params. + anchors_tensor = K.reshape(K.constant(anchors), [1, 1, 1, num_anchors, 2]) + + grid_shape = K.shape(feats)[1:3] # height, width + grid_y = K.tile(K.reshape(K.arange(0, stop=grid_shape[0]), [-1, 1, 1, 1]), + [1, grid_shape[1], 1, 1]) + grid_x = K.tile(K.reshape(K.arange(0, stop=grid_shape[1]), [1, -1, 1, 1]), + [grid_shape[0], 1, 1, 1]) + grid = K.concatenate([grid_x, grid_y]) + grid = K.cast(grid, K.dtype(feats)) + + feats = K.reshape( + feats, [-1, grid_shape[0], grid_shape[1], num_anchors, num_classes + 5]) + + # Adjust preditions to each spatial grid point and anchor size. + box_xy = (K.sigmoid(feats[..., :2]) + grid) / K.cast(grid_shape[::-1], K.dtype(feats)) + box_wh = K.exp(feats[..., 2:4]) * anchors_tensor / K.cast(input_shape[::-1], K.dtype(feats)) + box_confidence = K.sigmoid(feats[..., 4:5]) + box_class_probs = K.sigmoid(feats[..., 5:]) + + if calc_loss == True: + return grid, feats, box_xy, box_wh + return box_xy, box_wh, box_confidence, box_class_probs + + +def yolo_correct_boxes(box_xy, box_wh, input_shape, image_shape): + '''Get corrected boxes''' + box_yx = box_xy[..., ::-1] + box_hw = box_wh[..., ::-1] + input_shape = K.cast(input_shape, K.dtype(box_yx)) + image_shape = K.cast(image_shape, K.dtype(box_yx)) + new_shape = K.round(image_shape * K.min(input_shape/image_shape)) + offset = (input_shape-new_shape)/2./input_shape + scale = input_shape/new_shape + box_yx = (box_yx - offset) * scale + box_hw *= scale + + box_mins = box_yx - (box_hw / 2.) + box_maxes = box_yx + (box_hw / 2.) + boxes = K.concatenate([ + box_mins[..., 0:1], # y_min + box_mins[..., 1:2], # x_min + box_maxes[..., 0:1], # y_max + box_maxes[..., 1:2] # x_max + ]) + + # Scale boxes back to original image shape. + boxes *= K.concatenate([image_shape, image_shape]) + return boxes + + +def yolo_boxes_and_scores(feats, anchors, num_classes, input_shape, image_shape): + '''Process Conv layer output''' + box_xy, box_wh, box_confidence, box_class_probs = yolo_head(feats, + anchors, num_classes, input_shape) + boxes = yolo_correct_boxes(box_xy, box_wh, input_shape, image_shape) + boxes = K.reshape(boxes, [-1, 4]) + box_scores = box_confidence * box_class_probs + box_scores = K.reshape(box_scores, [-1, num_classes]) + return boxes, box_scores + + +def yolo_eval(yolo_outputs, + anchors, + num_classes, + image_shape, + max_boxes=20, + score_threshold=.6, + iou_threshold=.5): + """Evaluate YOLO model on given input and return filtered boxes.""" + num_layers = len(yolo_outputs) + anchor_mask = [[6,7,8], [3,4,5], [0,1,2]] if num_layers==3 else [[3,4,5], [1,2,3]] # default setting + input_shape = K.shape(yolo_outputs[0])[1:3] * 32 + boxes = [] + box_scores = [] + for l in range(num_layers): + _boxes, _box_scores = yolo_boxes_and_scores(yolo_outputs[l], + anchors[anchor_mask[l]], num_classes, input_shape, image_shape) + boxes.append(_boxes) + box_scores.append(_box_scores) + boxes = K.concatenate(boxes, axis=0) + box_scores = K.concatenate(box_scores, axis=0) + + mask = box_scores >= score_threshold + max_boxes_tensor = K.constant(max_boxes, dtype='int32') + boxes_ = [] + scores_ = [] + classes_ = [] + for c in range(num_classes): + # TODO: use keras backend instead of tf. + class_boxes = tf.boolean_mask(boxes, mask[:, c]) + class_box_scores = tf.boolean_mask(box_scores[:, c], mask[:, c]) + nms_index = tf.image.non_max_suppression( + class_boxes, class_box_scores, max_boxes_tensor, iou_threshold=iou_threshold) + class_boxes = K.gather(class_boxes, nms_index) + class_box_scores = K.gather(class_box_scores, nms_index) + classes = K.ones_like(class_box_scores, 'int32') * c + boxes_.append(class_boxes) + scores_.append(class_box_scores) + classes_.append(classes) + boxes_ = K.concatenate(boxes_, axis=0) + scores_ = K.concatenate(scores_, axis=0) + classes_ = K.concatenate(classes_, axis=0) + + return boxes_, scores_, classes_ + + +def preprocess_true_boxes(true_boxes, input_shape, anchors, num_classes): + '''Preprocess true boxes to training input format + + Parameters + ---------- + true_boxes: array, shape=(m, T, 5) + Absolute x_min, y_min, x_max, y_max, class_id relative to input_shape. + input_shape: array-like, hw, multiples of 32 + anchors: array, shape=(N, 2), wh + num_classes: integer + + Returns + ------- + y_true: list of array, shape like yolo_outputs, xywh are reletive value + + ''' + assert (true_boxes[..., 4]0 + + for b in range(m): + # Discard zero rows. + wh = boxes_wh[b, valid_mask[b]] + if len(wh)==0: continue + # Expand dim to apply broadcasting. + wh = np.expand_dims(wh, -2) + box_maxes = wh / 2. + box_mins = -box_maxes + + intersect_mins = np.maximum(box_mins, anchor_mins) + intersect_maxes = np.minimum(box_maxes, anchor_maxes) + intersect_wh = np.maximum(intersect_maxes - intersect_mins, 0.) + intersect_area = intersect_wh[..., 0] * intersect_wh[..., 1] + box_area = wh[..., 0] * wh[..., 1] + anchor_area = anchors[..., 0] * anchors[..., 1] + iou = intersect_area / (box_area + anchor_area - intersect_area) + + # Find best anchor for each true box + best_anchor = np.argmax(iou, axis=-1) + + for t, n in enumerate(best_anchor): + for l in range(num_layers): + if n in anchor_mask[l]: + i = np.floor(true_boxes[b,t,0]*grid_shapes[l][1]).astype('int32') + j = np.floor(true_boxes[b,t,1]*grid_shapes[l][0]).astype('int32') + k = anchor_mask[l].index(n) + c = true_boxes[b,t, 4].astype('int32') + y_true[l][b, j, i, k, 0:4] = true_boxes[b,t, 0:4] + y_true[l][b, j, i, k, 4] = 1 + y_true[l][b, j, i, k, 5+c] = 1 + + return y_true + + +def box_iou(b1, b2): + '''Return iou tensor + + Parameters + ---------- + b1: tensor, shape=(i1,...,iN, 4), xywh + b2: tensor, shape=(j, 4), xywh + + Returns + ------- + iou: tensor, shape=(i1,...,iN, j) + + ''' + + # Expand dim to apply broadcasting. + b1 = K.expand_dims(b1, -2) + b1_xy = b1[..., :2] + b1_wh = b1[..., 2:4] + b1_wh_half = b1_wh/2. + b1_mins = b1_xy - b1_wh_half + b1_maxes = b1_xy + b1_wh_half + + # Expand dim to apply broadcasting. + b2 = K.expand_dims(b2, 0) + b2_xy = b2[..., :2] + b2_wh = b2[..., 2:4] + b2_wh_half = b2_wh/2. + b2_mins = b2_xy - b2_wh_half + b2_maxes = b2_xy + b2_wh_half + + intersect_mins = K.maximum(b1_mins, b2_mins) + intersect_maxes = K.minimum(b1_maxes, b2_maxes) + intersect_wh = K.maximum(intersect_maxes - intersect_mins, 0.) + intersect_area = intersect_wh[..., 0] * intersect_wh[..., 1] + b1_area = b1_wh[..., 0] * b1_wh[..., 1] + b2_area = b2_wh[..., 0] * b2_wh[..., 1] + iou = intersect_area / (b1_area + b2_area - intersect_area) + + return iou + + +def yolo_loss(args, anchors, num_classes, ignore_thresh=.5, print_loss=False): + '''Return yolo_loss tensor + + Parameters + ---------- + yolo_outputs: list of tensor, the output of yolo_body or tiny_yolo_body + y_true: list of array, the output of preprocess_true_boxes + anchors: array, shape=(N, 2), wh + num_classes: integer + ignore_thresh: float, the iou threshold whether to ignore object confidence loss + + Returns + ------- + loss: tensor, shape=(1,) + + ''' + num_layers = len(anchors)//3 # default setting + yolo_outputs = args[:num_layers] + y_true = args[num_layers:] + anchor_mask = [[6,7,8], [3,4,5], [0,1,2]] if num_layers==3 else [[3,4,5], [1,2,3]] + input_shape = K.cast(K.shape(yolo_outputs[0])[1:3] * 32, K.dtype(y_true[0])) + grid_shapes = [K.cast(K.shape(yolo_outputs[l])[1:3], K.dtype(y_true[0])) for l in range(num_layers)] + loss = 0 + m = K.shape(yolo_outputs[0])[0] # batch size, tensor + mf = K.cast(m, K.dtype(yolo_outputs[0])) + + for l in range(num_layers): + object_mask = y_true[l][..., 4:5] + true_class_probs = y_true[l][..., 5:] + + grid, raw_pred, pred_xy, pred_wh = yolo_head(yolo_outputs[l], + anchors[anchor_mask[l]], num_classes, input_shape, calc_loss=True) + pred_box = K.concatenate([pred_xy, pred_wh]) + + # Darknet raw box to calculate loss. + raw_true_xy = y_true[l][..., :2]*grid_shapes[l][::-1] - grid + raw_true_wh = K.log(y_true[l][..., 2:4] / anchors[anchor_mask[l]] * input_shape[::-1]) + raw_true_wh = K.switch(object_mask, raw_true_wh, K.zeros_like(raw_true_wh)) # avoid log(0)=-inf + box_loss_scale = 2 - y_true[l][...,2:3]*y_true[l][...,3:4] + + # Find ignore mask, iterate over each of batch. + ignore_mask = tf.TensorArray(K.dtype(y_true[0]), size=1, dynamic_size=True) + object_mask_bool = K.cast(object_mask, 'bool') + def loop_body(b, ignore_mask): + true_box = tf.boolean_mask(y_true[l][b,...,0:4], object_mask_bool[b,...,0]) + iou = box_iou(pred_box[b], true_box) + best_iou = K.max(iou, axis=-1) + ignore_mask = ignore_mask.write(b, K.cast(best_iou0: + np.random.shuffle(box) + if len(box)>max_boxes: box = box[:max_boxes] + box[:, [0,2]] = box[:, [0,2]]*scale + dx + box[:, [1,3]] = box[:, [1,3]]*scale + dy + box_data[:len(box)] = box + + return image_data, box_data + + # resize image + new_ar = w/h * rand(1-jitter,1+jitter)/rand(1-jitter,1+jitter) + scale = rand(.25, 2) + if new_ar < 1: + nh = int(scale*h) + nw = int(nh*new_ar) + else: + nw = int(scale*w) + nh = int(nw/new_ar) + image = image.resize((nw,nh), Image.BICUBIC) + + # place image + dx = int(rand(0, w-nw)) + dy = int(rand(0, h-nh)) + new_image = Image.new('RGB', (w,h), (128,128,128)) + new_image.paste(image, (dx, dy)) + image = new_image + + # flip image or not + flip = rand()<.5 + if flip: image = image.transpose(Image.FLIP_LEFT_RIGHT) + + # distort image + hue = rand(-hue, hue) + sat = rand(1, sat) if rand()<.5 else 1/rand(1, sat) + val = rand(1, val) if rand()<.5 else 1/rand(1, val) + x = rgb_to_hsv(np.array(image)/255.) + x[..., 0] += hue + x[..., 0][x[..., 0]>1] -= 1 + x[..., 0][x[..., 0]<0] += 1 + x[..., 1] *= sat + x[..., 2] *= val + x[x>1] = 1 + x[x<0] = 0 + image_data = hsv_to_rgb(x) # numpy array, 0 to 1 + + # correct boxes + box_data = np.zeros((max_boxes,5)) + if len(box)>0: + np.random.shuffle(box) + box[:, [0,2]] = box[:, [0,2]]*nw/iw + dx + box[:, [1,3]] = box[:, [1,3]]*nh/ih + dy + if flip: box[:, [0,2]] = w - box[:, [2,0]] + box[:, 0:2][box[:, 0:2]<0] = 0 + box[:, 2][box[:, 2]>w] = w + box[:, 3][box[:, 3]>h] = h + box_w = box[:, 2] - box[:, 0] + box_h = box[:, 3] - box[:, 1] + box = box[np.logical_and(box_w>1, box_h>1)] # discard invalid box + if len(box)>max_boxes: box = box[:max_boxes] + box_data[:len(box)] = box + + return image_data, box_data