diff --git a/main.py b/main.py index d492f40..db0b7ff 100644 --- a/main.py +++ b/main.py @@ -60,29 +60,60 @@ return None -def is_point_in_polygon(point, polygon): +def draw_polygon_and_detection(image, polygon_vertices, stethoscope_x, stethoscope_y): """ - Ray-Castingアルゴリズムを使用して、点が多角形の内部にあるかどうかを判定する + 5角形の領域と検出された聴診器位置を描画する + """ + # 画像のコピーを作成 + overlay = image.copy() - Args: - point: テストする点 [x, y] - polygon: 多角形の頂点リスト [[x1, y1], [x2, y2], ...] + # 5角形を描画 + vertices = polygon_vertices.astype(np.int32) + cv2.polylines(overlay, [vertices], True, (0, 255, 0), 2) # 緑色の線で5角形を描画 - Returns: - bool: 点が多角形の内部にある場合はTrue + # 検出された聴診器位置を描画(存在する場合) + if stethoscope_x is not None and stethoscope_y is not None: + center = (int(stethoscope_x), int(stethoscope_y)) + cv2.circle(overlay, center, 10, (255, 0, 0), -1) # 青色の円で検出位置を描画 + cv2.circle(overlay, center, 12, (255, 255, 255), 2) # 白い縁取り + + return overlay + + +def expand_points(p1, p2): + """ + 2点間の中点を変えずに距離を2倍に拡張する + """ + mid_x = (p1[0] + p2[0]) / 2 + mid_y = (p1[1] + p2[1]) / 2 + + # 各点から中点へのベクトルを計算 + vec_x = p1[0] - mid_x + vec_y = p1[1] - mid_y + + # ベクトルを2倍に拡張 + new_p1 = [mid_x + vec_x * 2, mid_y + vec_y * 2] + new_p2 = [mid_x - vec_x * 2, mid_y - vec_y * 2] + + return np.array(new_p1), np.array(new_p2) + + +def point_in_polygon(point, vertices): + """ + 点が多角形の内部にあるかどうかを判定する """ x, y = point - n = len(polygon) + n = len(vertices) inside = False j = n - 1 for i in range(n): - if ((polygon[i][1] > y) != (polygon[j][1] > y)) and ( + if (vertices[i][1] > y) != (vertices[j][1] > y) and ( x - < (polygon[j][0] - polygon[i][0]) - * (y - polygon[i][1]) - / (polygon[j][1] - polygon[i][1]) - + polygon[i][0] + < (vertices[j][0] - vertices[i][0]) + * (y - vertices[i][1]) + / (vertices[j][1] - vertices[i][1]) + + vertices[i][0] ): inside = not inside j = i @@ -90,17 +121,14 @@ return inside -def yolox_detector_inference(frame, yolox_inferencer, landmarks=None, score_thr=0.3): +def yolox_detector_inference(frame, yolox_inferencer, pose_keypoints, score_thr=0.3): """ - YOLOXを使用して聴診器を検出し、ランドマークで形成されるポリゴン内の座標を返す + YOLOXで聴診器を検出し、指定された体のポリゴン内の検出結果のみを返す Args: frame: 入力画像 yolox_inferencer: YOLOXの推論モデル - landmarks: [[left_shoulder_x, left_shoulder_y], - [right_shoulder_x, right_shoulder_y], - [left_hip_x, left_hip_y], - [right_hip_x, right_hip_y]] + pose_keypoints: RTMPoseで検出したキーポイント score_thr: 検出スコアの閾値 Returns: @@ -118,26 +146,42 @@ stethoscope_y = None max_score = -1 - # ランドマークが与えられている場合、ポリゴンを形成 - if landmarks is not None: - polygon = landmarks - else: - # ランドマークがない場合は画像全体を対象とする - h, w = frame.shape[:2] - polygon = [[0, 0], [w, 0], [w, h], [0, h]] + # キーポイントから5角形の頂点を生成 + nose = pose_keypoints[0] + left_shoulder = pose_keypoints[5] + right_shoulder = pose_keypoints[6] + left_hip = pose_keypoints[11] + right_hip = pose_keypoints[12] + # 肩と腰の座標を拡張 + expanded_left_shoulder, expanded_right_shoulder = expand_points( + left_shoulder, right_shoulder + ) + expanded_left_hip, expanded_right_hip = expand_points(left_hip, right_hip) + + # 5角形の頂点を定義 + polygon_vertices = np.array( + [ + nose, + expanded_left_shoulder, + expanded_left_hip, + expanded_right_hip, + expanded_right_shoulder, + ] + ) + + # 各検出結果に対して処理 for i, (label, score) in enumerate( zip(predictions["labels"], predictions["scores"]) ): if score >= score_thr and label == 0: # label 0 は聴診器を示す bbox = predictions["bboxes"][i] - - # バウンディングボックスの中心点を計算 + # 中心座標を計算 center_x = (bbox[0] + bbox[2]) / 2 center_y = (bbox[1] + bbox[3]) / 2 - # 中心点がポリゴン内にあるかチェック - if is_point_in_polygon([center_x, center_y], polygon): + # 中心点が5角形の内部にあるか確認 + if point_in_polygon([center_x, center_y], polygon_vertices): if score > max_score: stethoscope_x = center_x stethoscope_y = center_y @@ -158,6 +202,11 @@ stethoscope_overlay_img, cv2.COLOR_RGB2BGR ) + # ポリゴンと検出位置を描画 + stethoscope_overlay_img = draw_polygon_and_detection( + stethoscope_overlay_img, polygon_vertices, stethoscope_x, stethoscope_y + ) + return stethoscope_overlay_img, stethoscope_x, stethoscope_y @@ -275,7 +324,8 @@ left_hip = landmarks[2] right_hip = landmarks[3] - elif RTMPOSE_ENABLED: + # process_images関数内のRTMPOSE_ENABLEDの部分を修正 + if RTMPOSE_ENABLED: det_result = inference_detector(detector, frame) pred_instance = det_result.pred_instances.cpu().numpy() bboxes = np.concatenate( @@ -287,18 +337,20 @@ bboxes = bboxes[nms(bboxes, 0.3), :4] pose_results = inference_topdown(pose_estimator, frame, bboxes) data_samples = merge_data_samples(pose_results) - keypoints = extract_keypoints_rtmpose(pose_results) - if keypoints is None: + pose_keypoints = extract_keypoints_rtmpose(pose_results) + if pose_keypoints is None: print(f"Failed to extract keypoints for image: {image_path}") continue - """ left_shoulder = keypoints[5] - right_shoulder = keypoints[6] - left_hip = keypoints[11] - right_hip = keypoints[12] テレコ確認""" - left_shoulder = keypoints[6] - right_shoulder = keypoints[5] - left_hip = keypoints[12] - right_hip = keypoints[11] + + """ left_shoulder = pose_keypoints[5] + right_shoulder = pose_keypoints[6] + left_hip = pose_keypoints[11] + right_hip = pose_keypoints[12] テレコ確認""" + left_shoulder = pose_keypoints[6] + right_shoulder = pose_keypoints[5] + left_hip = pose_keypoints[12] + right_hip = pose_keypoints[11] + if visualizer is not None: visualizer.add_datasample( "result", @@ -314,28 +366,22 @@ kpt_thr=0.3, ) pose_overlay_img = visualizer.get_image() - else: - print( - "No pose estimation method enabled. Please enable either PoseNet or RTMPose." - ) - continue + + # YOLOXの部分を修正 + yolox_inferencer = init_yolox() if MobileNetV1SSD_ENABLED: stethoscope_overlay_img, stethoscope_x, stethoscope_y = ears_ai.ssd_detect( frame, None ) - yolox_inferencer = init_yolox() - - if YOLOX_ENABLED: - landmarks = [ - [left_shoulder[0], left_shoulder[1]], - [right_shoulder[0], right_shoulder[1]], - [left_hip[0], left_hip[1]], - [right_hip[0], right_hip[1]], - ] + if YOLOX_ENABLED and pose_keypoints is not None: stethoscope_overlay_img, stethoscope_x, stethoscope_y = ( - yolox_detector_inference(frame, yolox_inferencer, landmarks=landmarks) + yolox_detector_inference( + frame, + yolox_inferencer, + pose_keypoints, + ) ) cv2.imwrite(