diff --git a/config.py b/config.py index 83cfdf0..8a19625 100644 --- a/config.py +++ b/config.py @@ -18,10 +18,10 @@ CATBOOST_ENABLED = False NGBOOST_ENABLED = False NORMALIZE_ENABLED = True -POSENET_ENABLED = True -RTMPOSE_ENABLED = False -MOBILENETV1SSD_ENABLED = True -YOLOX_ENABLED = False +POSENET_ENABLED = False +RTMPOSE_ENABLED = True +MOBILENETV1SSD_ENABLED = False +YOLOX_ENABLED = True EARSNET_CROP_ENABLED = True # Neural network model settings diff --git a/main.py b/main.py index 4ef530c..1387149 100644 --- a/main.py +++ b/main.py @@ -48,7 +48,7 @@ NGBOOST_ENABLED = config.NGBOOST_ENABLED POSENET_ENABLED = config.POSENET_ENABLED RTMPOSE_ENABLED = config.RTMPOSE_ENABLED -MobileNetV1SSD_ENABLED = config.MOBILENETV1SSD_ENABLED +MOBILENETV1SSD_ENABLED = config.MOBILENETV1SSD_ENABLED # ここを True にするとSSD使う YOLOX_ENABLED = config.YOLOX_ENABLED EARSNET_ENABLED = config.EARSNET_ENABLED EARSNET_CROP_ENABLED = config.EARSNET_CROP_ENABLED @@ -64,6 +64,9 @@ fps_history = [] +############################################################################### +# FPS監視スレッド +############################################################################### def fps_monitor(interval=1.0): """推論処理完了したフレーム数を定期的に見てFPSを算出する。描画時間は含まない。""" global processed_frames, stop_fps_thread, fps_history @@ -94,7 +97,7 @@ ############################################################################### -# モデルロード系 +# モデルロード系 (LightGBM/XGBoost など) ############################################################################### def load_model(model_path, model_type="lgb"): with open(model_path, "rb") as model_file: @@ -107,7 +110,7 @@ ############################################################################### -# YOLOX +# YOLOX 初期化 ############################################################################### def init_yolox(): try: @@ -122,20 +125,31 @@ } yolox_inferencer = DetInferencer(**init_args) return yolox_inferencer - except Exception as e: print(f"Error initializing YOLOX: {str(e)}") return None ############################################################################### +# SSD検出 (MobileNetV1 SSD) +############################################################################### +def ssd_detector_inference(frame, ears_ai): + """ + MobileNetV1 SSD 用の推論。 + ears_ai.ssd_detect(frame, None) → (overlay_img, x, y) + overlay_img: 推論の可視化結果 + x, y: 聴診器中心座標 (未検出時は 0, 0) + """ + stethoscope_overlay_img, stethoscope_x, stethoscope_y = ears_ai.ssd_detect( + frame, None + ) + return stethoscope_overlay_img, stethoscope_x, stethoscope_y + + +############################################################################### # Pillow-based drawing helpers ############################################################################### def pillow_draw_circle(draw, center, radius, fill=None, outline=None, width=1): - """ - 円を描画するユーティリティ。 - fill で塗りつぶし色、outline + width で外枠を指定可能。 - """ x, y = int(center[0]), int(center[1]) left_up = (x - radius, y - radius) right_down = (x + radius, y + radius) @@ -143,21 +157,14 @@ def draw_glow_marker(draw, center, main_color, radius=5): - """ - 光彩風に見えるように、やや大きめの枠 + 中心塗りつぶし を描画。 - main_color: (R, G, B) - radius: 中心の塗りつぶし半径 - """ - # 外側の枠(光彩用):radius+3くらいにして薄い色 or 白枠など outer_radius = radius + 3 x, y = int(center[0]), int(center[1]) - - # 1) 白枠で大きめの円 + # 白枠 pillow_draw_circle( draw, (x, y), outer_radius, fill=None, outline=(255, 255, 255), width=2 ) - # 2) 中心を main_color で塗りつぶし - pillow_draw_circle(draw, (x, y), radius, fill=main_color, outline=None, width=0) + # 中心塗りつぶし + pillow_draw_circle(draw, (x, y), radius, fill=main_color) def pillow_draw_polygon(draw, vertices, outline=(0, 255, 0), width=2): @@ -261,10 +268,8 @@ def expand_points(p1, p2): mid_x = (p1[0] + p2[0]) / 2 mid_y = (p1[1] + p2[1]) / 2 - vec_x = p1[0] - mid_x vec_y = p1[1] - mid_y - new_p1 = [mid_x + vec_x * 2, mid_y + vec_y * 2] new_p2 = [mid_x - vec_x * 2, mid_y - vec_y * 2] return np.array(new_p1), np.array(new_p2) @@ -322,6 +327,9 @@ return np.arctan2(vector[1], vector[0]) +############################################################################### +# 動画→フレーム +############################################################################### def video_to_frames(video_path, output_dir): os.makedirs(output_dir, exist_ok=True) video = cv2.VideoCapture(video_path) @@ -390,7 +398,7 @@ ############################################################################### -# メイン処理 (推論 & 座標計算のみ) -> FPS計測対象 +# メイン処理 (推論 & 座標計算) ############################################################################### def process_images(args, detector, pose_estimator, visualizer): global processed_frames @@ -419,13 +427,16 @@ rows = [] normalized_rows = [] + # YOLOX 初期化 yolox_inferencer = None if YOLOX_ENABLED: yolox_inferencer = init_yolox() + # 時間計測用 dict timings = { "rtmpose_single": [], "yolox_single": [], + "ssd_single": [], # SSD 用 追加 "conv_single": [], "lightgbm_single": [], "xgboost_single": [], @@ -434,11 +445,14 @@ "pipeline_rtmpose_yolox_conv": [], "pipeline_rtmpose_yolox_lightgbm": [], "pipeline_rtmpose_yolox_xgboost": [], + "pipeline_rtmpose_ssd_conv": [], # SSD 用 追加 + "pipeline_rtmpose_ssd_lightgbm": [], # SSD 用 追加 + "pipeline_rtmpose_ssd_xgboost": [], # SSD 用 追加 "pipeline_earsnet": [], "pipeline_earsnet_cropped": [], } - # モデルロード + # モデルの事前ロード (LightGBM/XGBoost など) if LIGHTGBM_ENABLED: lgb_model_x = load_model("./models/LightGBM/stethoscope_calc_x_best_model.pkl") lgb_model_y = load_model("./models/LightGBM/stethoscope_calc_y_best_model.pkl") @@ -504,8 +518,10 @@ print(f"Failed to load image: {image_path}") continue - # (A) RTMPose + # ============= (A) RTMpose or PoseNet で人体キーポイント ============= rtmpose_time = 0.0 + pose_keypoints = None + if RTMPOSE_ENABLED: start_time_rtmpose = time.time() frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) @@ -533,6 +549,7 @@ processed_frames += 1 continue + # 可視化 if visualizer is not None: visualizer.add_datasample( "result", @@ -565,80 +582,65 @@ rtmpose_time = end_time_rtmpose - start_time_rtmpose timings["rtmpose_single"].append(rtmpose_time) - left_shoulder = landmarks[1] - right_shoulder = landmarks[0] - left_hip = landmarks[3] - right_hip = landmarks[2] + # landmarks = [right_shoulder, left_shoulder, right_hip, left_hip] (例) + # ここは利用環境に合わせて入れ替えてください + left_shoulder = (landmarks[0][1], landmarks[0][0]) + right_shoulder = (landmarks[1][1], landmarks[1][0]) + left_hip = (landmarks[2][1], landmarks[2][0]) + right_hip = (landmarks[3][1], landmarks[3][0]) cv2.imwrite( os.path.join(pose_overlay_dir, image_file_name), pose_overlay_img ) + else: + # RTMPose/PoseNetどちらも無効 → 0固定 left_shoulder = (0, 0) right_shoulder = (0, 0) left_hip = (0, 0) right_hip = (0, 0) - # (B) YOLOX - yolox_time = 0.0 + # ============= (B) 聴診器検出: YOLOX or SSD ============= stethoscope_x, stethoscope_y = 0, 0 + detection_time = 0.0 + + # 1) YOLOX if YOLOX_ENABLED: - if ( - RTMPOSE_ENABLED - and "pose_keypoints" in locals() - and pose_keypoints is not None - ): - start_time_yolox = time.time() - stethoscope_overlay_img, stethoscope_x, stethoscope_y = ( - yolox_detector_inference(frame, yolox_inferencer, pose_keypoints) + if pose_keypoints is not None: + start_t = time.time() + from_yolox_img, stethoscope_x, stethoscope_y = yolox_detector_inference( + frame, yolox_inferencer, pose_keypoints ) - end_time_yolox = time.time() - yolox_time = end_time_yolox - start_time_yolox - timings["yolox_single"].append(yolox_time) + end_t = time.time() + detection_time = end_t - start_t + timings["yolox_single"].append(detection_time) + # 可視化 cv2.imwrite( os.path.join(stethoscope_overlay_dir, image_file_name), - stethoscope_overlay_img, - ) - elif POSENET_ENABLED: - pose_keypoints_pose_net = [[0, 0]] * 13 - pose_keypoints_pose_net[5] = (left_shoulder[0], left_shoulder[1]) - pose_keypoints_pose_net[6] = (right_shoulder[0], right_shoulder[1]) - pose_keypoints_pose_net[11] = (left_hip[0], left_hip[1]) - pose_keypoints_pose_net[12] = (right_hip[0], right_hip[1]) - - start_time_yolox = time.time() - stethoscope_overlay_img, stethoscope_x, stethoscope_y = ( - yolox_detector_inference( - frame, yolox_inferencer, pose_keypoints_pose_net - ) - ) - end_time_yolox = time.time() - yolox_time = end_time_yolox - start_time_yolox - timings["yolox_single"].append(yolox_time) - - cv2.imwrite( - os.path.join(stethoscope_overlay_dir, image_file_name), - stethoscope_overlay_img, + from_yolox_img, ) - detection_time_rtmpose_yolox = rtmpose_time + yolox_time + # 2) SSD (MobileNetV1) + elif MOBILENETV1SSD_ENABLED: + start_t = time.time() + from_ssd_img, stethoscope_x, stethoscope_y = ssd_detector_inference( + frame, ears_ai + ) + end_t = time.time() + detection_time = end_t - start_t + timings["ssd_single"].append(detection_time) - row = { - "image_file_name": image_file_name, - "left_shoulder_x": left_shoulder[0], - "left_shoulder_y": left_shoulder[1], - "right_shoulder_x": right_shoulder[0], - "right_shoulder_y": right_shoulder[1], - "left_hip_x": left_hip[0], - "left_hip_y": left_hip[1], - "right_hip_x": right_hip[0], - "right_hip_y": right_hip[1], - "stethoscope_x": stethoscope_x, - "stethoscope_y": stethoscope_y, - } + # 可視化 + cv2.imwrite( + os.path.join(stethoscope_overlay_dir, image_file_name), + from_ssd_img, + ) - # (C) EARSNet (単体) + # Pipeline で合計検出時間 + detection_time_rtmpose_detector = rtmpose_time + detection_time + + # ============= (C) EARSNet (単体) ============= if EARSNET_ENABLED: start_time_earsnet = time.time() earsnet_x, earsnet_y = earsnet_predictor.predict(image_path) @@ -647,10 +649,10 @@ timings["earsnet_single"].append(earsnet_time) timings["pipeline_earsnet"].append(earsnet_time) - row["earsnet_stethoscope_x"] = earsnet_x - row["earsnet_stethoscope_y"] = earsnet_y + else: + earsnet_x, earsnet_y = 0, 0 - # (D) EARSNet (クロップ) + # ============= (D) EARSNet (クロップ) ============= if EARSNET_CROP_ENABLED: cropped_img, (crop_xmin, crop_ymin) = crop_body_from_keypoints( frame, left_shoulder, right_shoulder, left_hip, right_hip @@ -669,10 +671,29 @@ pipeline_earsnet_cropped_time = rtmpose_time + earsnet_cropped_time timings["pipeline_earsnet_cropped"].append(pipeline_earsnet_cropped_time) - row["earsnet_crop_stethoscope_x"] = earsnet_cropped_x - row["earsnet_crop_stethoscope_y"] = earsnet_cropped_y + else: + earsnet_cropped_x, earsnet_cropped_y = 0, 0 - # (E) 正規化 + # ============= (E) リザルト保存用 row を組み立て ============= + row = { + "image_file_name": image_file_name, + "left_shoulder_x": left_shoulder[0], + "left_shoulder_y": left_shoulder[1], + "right_shoulder_x": right_shoulder[0], + "right_shoulder_y": right_shoulder[1], + "left_hip_x": left_hip[0], + "left_hip_y": left_hip[1], + "right_hip_x": right_hip[0], + "right_hip_y": right_hip[1], + "stethoscope_x": stethoscope_x, + "stethoscope_y": stethoscope_y, + "earsnet_stethoscope_x": earsnet_x, + "earsnet_stethoscope_y": earsnet_y, + "earsnet_crop_stethoscope_x": earsnet_cropped_x, + "earsnet_crop_stethoscope_y": earsnet_cropped_y, + } + + # ============= (F) 正規化 ============= source_points = np.array( [ [float(row["left_shoulder_x"]), float(row["left_shoulder_y"])], @@ -682,12 +703,11 @@ ], dtype=np.float32, ) - stethoscope_point = np.array( - [float(row["stethoscope_x"]), float(row["stethoscope_y"])] - ) + stethoscope_point = np.array([row["stethoscope_x"], row["stethoscope_y"]]) normalized_points = normalize_quadrilateral_with_point( source_points.flatten(), stethoscope_point ) + normalized_row = { "image_file_name": image_file_name, "left_shoulder_x": normalized_points[0, 0], @@ -702,86 +722,156 @@ "stethoscope_y": normalized_points[4, 1], } - # EARSNet の Normalized + # EARSNet ノーマライズ (通常/クロップ) if EARSNET_ENABLED: - stetho_point_earsnet = np.array( - [ - float(row.get("earsnet_stethoscope_x", 0)), - float(row.get("earsnet_stethoscope_y", 0)), - ] + p_earsnet = np.array( + [row["earsnet_stethoscope_x"], row["earsnet_stethoscope_y"]] ) norm_earsnet = normalize_quadrilateral_with_point( - source_points.flatten(), stetho_point_earsnet + source_points.flatten(), p_earsnet ) normalized_row["earsnet_stethoscope_x"] = norm_earsnet[4, 0] normalized_row["earsnet_stethoscope_y"] = norm_earsnet[4, 1] if EARSNET_CROP_ENABLED: - stetho_point_crop = np.array( - [ - float(row.get("earsnet_crop_stethoscope_x", 0)), - float(row.get("earsnet_crop_stethoscope_y", 0)), - ] + p_earsnet_crop = np.array( + [row["earsnet_crop_stethoscope_x"], row["earsnet_crop_stethoscope_y"]] ) norm_earsnet_crop = normalize_quadrilateral_with_point( - source_points.flatten(), stetho_point_crop + source_points.flatten(), p_earsnet_crop ) normalized_row["earsnet_crop_stethoscope_x"] = norm_earsnet_crop[4, 0] normalized_row["earsnet_crop_stethoscope_y"] = norm_earsnet_crop[4, 1] - # Conv (Affine) - if RTMPOSE_ENABLED and YOLOX_ENABLED and CONV_ENABLED: - source_pts = np.array( - [ - [float(row[f"{pos}_x"]), float(row[f"{pos}_y"])] - for pos in [ - "left_shoulder", - "right_shoulder", - "left_hip", - "right_hip", - ] - ], - dtype=np.float32, - ) - stetho_pt = np.array( - [float(row["stethoscope_x"]), float(row["stethoscope_y"])] - ) - calc_x, calc_y = calc_position.calc_affine(source_pts, *stetho_pt) - row["conv_stethoscope_x"] = calc_x - row["conv_stethoscope_y"] = calc_y + # ============= (G) パイプライン (Conv/XGBoost/LightGBM) ============= + # YOLOX vs. SSD で結果保存先のキーが違うのでここで使う変数を決める + # RTMPose + YOLOX or SSD + if RTMPOSE_ENABLED: + # YOLOX + if YOLOX_ENABLED and pose_keypoints is not None: + # conv + if CONV_ENABLED: + start_conv = time.time() + sp = np.array( + [ + [row["left_shoulder_x"], row["left_shoulder_y"]], + [row["right_shoulder_x"], row["right_shoulder_y"]], + [row["left_hip_x"], row["left_hip_y"]], + [row["right_hip_x"], row["right_hip_y"]], + ], + dtype=np.float32, + ) + stp = np.array([row["stethoscope_x"], row["stethoscope_y"]]) + _ = calc_position.calc_affine(sp, *stp) + end_conv = time.time() + conv_time = end_conv - start_conv + timings["conv_single"].append(conv_time) + timings["pipeline_rtmpose_yolox_conv"].append( + rtmpose_time + detection_time + conv_time + ) - # XGBoost - if RTMPOSE_ENABLED and YOLOX_ENABLED and XGBOOST_ENABLED: - if NORMALIZE_ENABLED: - input_data_xg = pd.DataFrame([normalized_row]) - else: - input_data_xg = pd.DataFrame([row]) - X_scaled_x = xg_scaler_x.transform(input_data_xg[input_columns]) - x_pred = xg_model_x.predict(X_scaled_x)[0] - X_scaled_y = xg_scaler_y.transform(input_data_xg[input_columns]) - y_pred = xg_model_y.predict(X_scaled_y)[0] - row["Xgboost_stethoscope_x"] = x_pred - row["Xgboost_stethoscope_y"] = y_pred + # XGBoost + if XGBOOST_ENABLED: + xg_start = time.time() + if NORMALIZE_ENABLED: + input_data_xg = pd.DataFrame([normalized_row]) + else: + input_data_xg = pd.DataFrame([row]) + X_scaled_x = xg_scaler_x.transform(input_data_xg[input_columns]) + _ = xg_model_x.predict(X_scaled_x)[0] + X_scaled_y = xg_scaler_y.transform(input_data_xg[input_columns]) + _ = xg_model_y.predict(X_scaled_y)[0] + xg_end = time.time() + xg_time = xg_end - xg_start + timings["xgboost_single"].append(xg_time) + timings["pipeline_rtmpose_yolox_xgboost"].append( + rtmpose_time + detection_time + xg_time + ) - # LightGBM - if RTMPOSE_ENABLED and YOLOX_ENABLED and LIGHTGBM_ENABLED: - if NORMALIZE_ENABLED: - input_data_lgb = pd.DataFrame([normalized_row]) - else: - input_data_lgb = pd.DataFrame([row]) - X_scaled_x = lgb_scaler_x.transform(input_data_lgb[input_columns]) - lgb_x_pred = lgb_model_x.predict(X_scaled_x)[0] - X_scaled_y = lgb_scaler_y.transform(input_data_lgb[input_columns]) - lgb_y_pred = lgb_model_y.predict(X_scaled_y)[0] - row["lightGBM_stethoscope_x"] = lgb_x_pred - row["lightGBM_stethoscope_y"] = lgb_y_pred + # LightGBM + if LIGHTGBM_ENABLED: + lgb_start = time.time() + if NORMALIZE_ENABLED: + input_data_lgb = pd.DataFrame([normalized_row]) + else: + input_data_lgb = pd.DataFrame([row]) + X_scaled_x = lgb_scaler_x.transform(input_data_lgb[input_columns]) + _ = lgb_model_x.predict(X_scaled_x)[0] + X_scaled_y = lgb_scaler_y.transform(input_data_lgb[input_columns]) + _ = lgb_model_y.predict(X_scaled_y)[0] + lgb_end = time.time() + lgb_time = lgb_end - lgb_start + timings["lightgbm_single"].append(lgb_time) + timings["pipeline_rtmpose_yolox_lightgbm"].append( + rtmpose_time + detection_time + lgb_time + ) + + # SSD + elif MOBILENETV1SSD_ENABLED: + # conv + if CONV_ENABLED: + start_conv = time.time() + sp = np.array( + [ + [row["left_shoulder_x"], row["left_shoulder_y"]], + [row["right_shoulder_x"], row["right_shoulder_y"]], + [row["left_hip_x"], row["left_hip_y"]], + [row["right_hip_x"], row["right_hip_y"]], + ], + dtype=np.float32, + ) + stp = np.array([row["stethoscope_x"], row["stethoscope_y"]]) + _ = calc_position.calc_affine(sp, *stp) + end_conv = time.time() + conv_time = end_conv - start_conv + timings["conv_single"].append(conv_time) + timings["pipeline_rtmpose_ssd_conv"].append( + rtmpose_time + detection_time + conv_time + ) + + # XGBoost + if XGBOOST_ENABLED: + xg_start = time.time() + if NORMALIZE_ENABLED: + input_data_xg = pd.DataFrame([normalized_row]) + else: + input_data_xg = pd.DataFrame([row]) + X_scaled_x = xg_scaler_x.transform(input_data_xg[input_columns]) + _ = xg_model_x.predict(X_scaled_x)[0] + X_scaled_y = xg_scaler_y.transform(input_data_xg[input_columns]) + _ = xg_model_y.predict(X_scaled_y)[0] + xg_end = time.time() + xg_time = xg_end - xg_start + timings["xgboost_single"].append(xg_time) + timings["pipeline_rtmpose_ssd_xgboost"].append( + rtmpose_time + detection_time + xg_time + ) + + # LightGBM + if LIGHTGBM_ENABLED: + lgb_start = time.time() + if NORMALIZE_ENABLED: + input_data_lgb = pd.DataFrame([normalized_row]) + else: + input_data_lgb = pd.DataFrame([row]) + X_scaled_x = lgb_scaler_x.transform(input_data_lgb[input_columns]) + _ = lgb_model_x.predict(X_scaled_x)[0] + X_scaled_y = lgb_scaler_y.transform(input_data_lgb[input_columns]) + _ = lgb_model_y.predict(X_scaled_y)[0] + lgb_end = time.time() + lgb_time = lgb_end - lgb_start + timings["lightgbm_single"].append(lgb_time) + timings["pipeline_rtmpose_ssd_lightgbm"].append( + rtmpose_time + detection_time + lgb_time + ) rows.append(row) + # 正規化後データ normalized_rows.append(normalized_row) processed_frames += 1 - # CSV書き込み + # ============= CSV書き込み ============= if rows: fieldnames = list(rows[0].keys()) csvfile_path = os.path.join(results_dir, "results.csv") @@ -805,11 +895,12 @@ print(f"Processed and saved results to: {csvfile_path}") print(f"Processed and saved normalized results to: {normfile_path}") + # 可視化動画化 generate_visualizations(csvfile_path, base_dir, results_dir) else: print("No data to write to CSV.") - # FPS計測結果をCSV保存 + # ============= FPS計測結果を CSV保存 ============= fps_data = [] for method_name, time_list in timings.items(): if not time_list: @@ -930,7 +1021,6 @@ stetho_color_rgb = (19, 80, 27) def draw_glow_marker(draw, center, main_color, radius=5): - # 光彩用外枠を白などにして少し大きめ outer_radius = radius + 3 x, y = int(center[0]), int(center[1]) # 白枠 @@ -951,6 +1041,8 @@ # 1) marked_images(肩/腰/聴診器) pil_marked = Image.fromarray(cv2.cvtColor(original_image, cv2.COLOR_BGR2RGB)) draw_marked = ImageDraw.Draw(pil_marked) + + # 肩・腰・聴診器 for point in [ "left_shoulder", "right_shoulder", @@ -967,7 +1059,6 @@ and not pd.isna(row[col_y]) ): x, y = int(row[col_x]), int(row[col_y]) - # 既存デフォルト色 (255,255,0) → 小さいマーカー draw_glow_marker( draw_marked, (x, y), main_color=(255, 255, 0), radius=5 ) @@ -977,7 +1068,7 @@ marked_dir = os.path.join(results_dir, "marked_images") cv2.imwrite(os.path.join(marked_dir, row["image_file_name"]), marked_bgr) - # 2) marked_pose_images(姿勢だけ) + # 2) marked_pose_images(姿勢のみ) pil_pose = Image.fromarray(cv2.cvtColor(original_image, cv2.COLOR_BGR2RGB)) draw_pose = ImageDraw.Draw(pil_pose) @@ -991,7 +1082,6 @@ and not pd.isna(row[col_y]) ): x, y = int(row[col_x]), int(row[col_y]) - # 光彩付き, main_color = (33,95,154) draw_glow_marker( draw_pose, (x, y), main_color=pose_color_rgb, radius=15 ) @@ -1001,7 +1091,7 @@ pose_dir_path = os.path.join(results_dir, pose_only_dir) cv2.imwrite(os.path.join(pose_dir_path, row["image_file_name"]), pose_bgr) - # 3) marked_stethoscope_images(聴診器だけ) + # 3) marked_stethoscope_images(聴診器のみ) pil_stetho = Image.fromarray(cv2.cvtColor(original_image, cv2.COLOR_BGR2RGB)) draw_stetho = ImageDraw.Draw(pil_stetho) @@ -1012,7 +1102,7 @@ and not pd.isna(row["stethoscope_y"]) ): sx, sy = int(row["stethoscope_x"]), int(row["stethoscope_y"]) - if sx != 0 or sy != 0: # 0なら未検出とみなす + if sx != 0 or sy != 0: draw_glow_marker( draw_stetho, (sx, sy), main_color=stetho_color_rgb, radius=15 ) @@ -1022,7 +1112,7 @@ stetho_dir_path = os.path.join(results_dir, stetho_only_dir) cv2.imwrite(os.path.join(stetho_dir_path, row["image_file_name"]), stetho_bgr) - # 4) combined系(従来) + # 4) combined系 combined_image_with_traj_rgb = body_np_rgb.copy() combined_image_without_traj_rgb = body_np_rgb.copy() @@ -1044,7 +1134,7 @@ color = colors[key] if key in colors else (0, 0, 255) - # (A) 個別 with trajectory + # 個別 with trajectory indiv_with_traj_rgb = body_np_rgb.copy() pil_indiv_with = Image.fromarray(indiv_with_traj_rgb) draw_indiv_with = ImageDraw.Draw(pil_indiv_with) @@ -1060,7 +1150,7 @@ ) cv2.imwrite(out_path_with, indiv_with_traj_bgr) - # (B) 個別 without trajectory + # 個別 without trajectory indiv_without_traj_rgb = body_np_rgb.copy() pil_indiv_without = Image.fromarray(indiv_without_traj_rgb) draw_indiv_without = ImageDraw.Draw(pil_indiv_without) @@ -1075,12 +1165,12 @@ ) cv2.imwrite(out_path_without, indiv_without_traj_bgr) - # (C) combined with trajectory + # combined with trajectory if len(points[key]) > 1: pillow_draw_polyline(draw_with_traj, points[key], color=color, width=2) draw_glow_marker(draw_with_traj, (x, y), main_color=color, radius=8) - # (D) combined without trajectory + # combined without trajectory draw_glow_marker(draw_without_traj, (x, y), main_color=color, radius=8) cwt_np = np.array(pil_with_traj) @@ -1095,13 +1185,12 @@ os.makedirs(cwod, exist_ok=True) cv2.imwrite(os.path.join(cwod, row["image_file_name"]), cwo_bgr) - # (2) 動画化 + # 動画化 create_video_from_images( os.path.join(results_dir, "marked_images"), os.path.join(results_dir, "marked_video.mp4"), ) - # 新規の姿勢と聴診器だけの動画 create_video_from_images( os.path.join(results_dir, pose_only_dir), os.path.join(results_dir, "pose_video.mp4"), @@ -1171,6 +1260,7 @@ help="Directory to save output images and results", ) + # RTMpose用 det_config = "modules/rtmpose/mmdetection_cfg/rtmdet_m_640-8xb32_coco-person.py" det_checkpoint = ( "models/rtmpose/rtmdet_m_8xb32-100e_coco-obj365-person-235e8209.pth"