diff --git a/Dockerfile b/Dockerfile index 65babdb..d545f13 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,14 +1,19 @@ -FROM python:3.8-slim +FROM openvino/ubuntu20_dev_no_samples +USER root COPY ./requirements.txt ./ RUN apt-get update && \ apt-get upgrade -y && \ apt-get install -y libgl1-mesa-dev \ - libglib2.0-0 && \ - pip3 install --upgrade pip setuptools && \ + libglib2.0-0 \ + libsm6 \ + libxext6 \ + libxrender-dev +RUN pip3 install --upgrade pip setuptools && \ pip3 install --upgrade wheel && \ pip3 install -r requirements.txt && \ pip3 cache purge +ENV QT_X11_NO_MITSHM=1 WORKDIR /code ADD . /code/ diff --git a/README.md b/README.md index 5b6d645..ab2abf0 100644 --- a/README.md +++ b/README.md @@ -1,2 +1,10 @@ DetectWhiteLinesByYOLOv5 =============== +docker image build -t openvino-yolov5 . +docker image build . + +xhost + +docker run -it --rm \ + --device /dev/video0:/dev/video0:mwr \ + -v /tmp/.X11-unix:/tmp/.X11-unix \ + -e DISPLAY=$DISPLAY openvino-yolov5 \ No newline at end of file diff --git a/main.py b/main.py index f698b6d..69004d0 100644 --- a/main.py +++ b/main.py @@ -1,16 +1,137 @@ +from timeit import default_timer as timer + import cv2 +import numpy as np +from openvino.inference_engine import IECore +import torch + +from utils.general import non_max_suppression, scale_coords +from utils.plots import Annotator + +WEBCAM_PORT = 0 +IMG_SIZE = 640 +CONF_THRESH = 0.6 +write_fps = True +MODEL_XML = r"yolov5s_640x640_opt.xml" +MODEL_WEIGHTS = r"yolov5s_640x640_opt.bin" + + +# yolov5に入力するIMG_SIZExIMG_SIZEで背景が0パディングされている形式に変更 +def convert_to_yolov5format_img(frame): + height, width = frame.shape[:2] + if width < height: + size, limit = height, width + else: + limit, size = height, width + start = int((size - limit) / 2) + fin = int((size + limit) / 2) + + transformed = np.full((size, size, 3), 114, np.uint8) + if size == height: + transformed[:, start:fin] = frame + else: + transformed[start:fin, :] = frame + transformed = cv2.resize(transformed, (IMG_SIZE, IMG_SIZE), interpolation=cv2.INTER_CUBIC) + return transformed + + +class DispFps: + def __init__(self): + # 表示関連定義 + self.__width = 160 + self.__height = 40 + self.__font_size = 1.0 + self.__font_width = 1 + self.__font_style = cv2.FONT_HERSHEY_COMPLEX + self.__font_color = (255, 255, 255) + self.__background_color = (0, 0, 0) + + # フレーム数カウント用変数 + self.__frame_count = 0 + + # FPS計算用変数 + self.__accum_time = 0 + self.__curr_fps = 0 + self.__prev_time = timer() + self.__str = "FPS: " + + def __calc(self): + # フレーム数更新 + self.__frame_count += 1 + + # FPS更新 + self.__curr_time = timer() + self.__exec_time = self.__curr_time - self.__prev_time + self.__prev_time = self.__curr_time + self.__accum_time = self.__accum_time + self.__exec_time + self.__curr_fps = self.__curr_fps + 1 + if self.__accum_time > 1: + self.__accum_time = self.__accum_time - 1 + self.__str = "FPS: " + str(self.__curr_fps) + self.__curr_fps = 0 + + def __disp(self, frame, str, x1, y1, x2, y2): + cv2.rectangle(frame, (x1, y1), (x2, y2), self.__background_color, -1) + cv2.putText(frame, str, (x1 + 5, y2 - 5), self.__font_style, self.__font_size, self.__font_color, + self.__font_width) + + def disp(self, frame): + # 表示内容計算 + self.__calc() + # フレーム数(左上に表示する) + self.__disp(frame, str(self.__frame_count), 0, 0, x2=self.__width, y2=self.__height) + # FPS(右上に表示する) + screen_width = int(frame.shape[1]) + self.__disp(frame, self.__str, screen_width - self.__width, 0, screen_width, self.__height) def main(): + ie = IECore() + net = ie.read_network(model=MODEL_XML, weights=MODEL_WEIGHTS) + + input_layer = next(iter(net.input_info)) + print(f"input layout: {net.input_info[input_layer].layout}") + print(f"input precision: {net.input_info[input_layer].precision}") + print(f"input shape: {net.input_info[input_layer].tensor_desc.dims}") + + config = {"CPU_THREADS_NUM": "8"} + exec_net = ie.load_network(network=net, device_name="CPU", config=config) + cap = cv2.VideoCapture(0) + dispFps = DispFps() while True: ret, frame = cap.read() + if not ret: break + N, C, H, W = net.input_info[input_layer].tensor_desc.dims + # OpenCV resize expects the destination size as (width, height) + src = convert_to_yolov5format_img(frame) - cv2.imshow("frame", frame) + input_data = np.expand_dims(np.transpose(src, (2, 0, 1))[::-1], 0).astype(np.float32) / 255.0 + + result = torch.tensor(exec_net.infer({input_layer: input_data})["output"]) + # NMS + pred = non_max_suppression(result, CONF_THRESH, 0.45, 0, False, max_det=10) + for i, det in enumerate(pred): + gn = torch.tensor(src.shape)[[1, 0, 1, 0]] + annotator = Annotator(src, line_width=3, example=str(["white_line"])) + if len(det): + # Rescale boxes from img_size to im0 size + det[:, :4] = scale_coords(input_data.shape[2:], det[:, :4], src.shape).round() + + for *xyxy, conf, cls in reversed(det): + c = int(cls) + label = "white_line" + annotator.box_label(xyxy, label) + if write_fps: + dispFps.disp(src) + + cv2.imshow("detect", src) + key = cv2.waitKey(1) + if key == ord("q"): break diff --git a/requirements.txt b/requirements.txt index dc90287..a36b686 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,2 +1,20 @@ -numpy==1.22.2 -opencv-python==4.5.5.62 \ No newline at end of file +cycler==0.11.0 +fonttools==4.29.1 +kiwisolver==1.3.2 +matplotlib==3.5.1 +numpy==1.19.5 +opencv-python==4.5.5.62 +openvino==2021.4.2 +packaging==21.3 +pandas==1.4.0 +Pillow==9.0.1 +pyparsing==3.0.7 +python-dateutil==2.8.2 +pytz==2021.3 +PyYAML==6.0 +scipy==1.8.0 +seaborn==0.11.2 +six==1.16.0 +torch==1.10.2 +torchvision==0.11.3 +typing-extensions==4.0.1 diff --git a/utils/flask_rest_api/__init__.py b/utils/flask_rest_api/__init__.py new file mode 100644 index 0000000..e69de29 --- /dev/null +++ b/utils/flask_rest_api/__init__.py