diff --git a/config.py b/config.py index 4c46e69..8a19625 100644 --- a/config.py +++ b/config.py @@ -4,7 +4,7 @@ DEVICE = "cuda:0" # Colors for different models (B,G,R format) -CONV_COLOR = (0, 255, 0) # 純粋な緑 +CONV_COLOR = (27, 80, 19) # 純粋な緑 XGBOOST_COLOR = (0, 165, 255) # オレンジ LIGHTGBM_COLOR = (255, 0, 255) # マゼンタ EARSNET_COLOR = (139, 0, 0) # ダークブルー diff --git a/main.py b/main.py index 81ed270..b849b37 100644 --- a/main.py +++ b/main.py @@ -19,6 +19,9 @@ from mmpose.structures import merge_data_samples from mmpose.utils import adapt_mmdet_pipeline +# Pillow +from PIL import Image, ImageDraw + import config # EARSNet @@ -132,22 +135,145 @@ return None -def draw_polygon_and_detection(image, polygon_vertices, stethoscope_x, stethoscope_y): - overlay = image.copy() - vertices = polygon_vertices.astype(np.int32) - cv2.polylines(overlay, [vertices], True, (0, 255, 0), 2) +############################################################################### +# Pillow-based drawing helpers +############################################################################### +def pillow_draw_circle(draw, center, radius, fill=None, outline=None, width=1): + """ + Draw a circle (via ellipse) on a Pillow draw context. + center: (x, y) + radius: int + fill, outline: color tuples (R, G, B) + width: outline thickness if fill=None + """ + x, y = int(center[0]), int(center[1]) + left_up = (x - radius, y - radius) + right_down = (x + radius, y + radius) + if fill is not None: + draw.ellipse([left_up, right_down], fill=fill, outline=outline, width=width) + else: + draw.ellipse([left_up, right_down], outline=outline, width=width) + + +def pillow_draw_polygon(draw, vertices, outline=(0, 255, 0), width=2): + """ + Draw a polygon (as connected lines) in Pillow. + vertices: list of (x, y) + """ + int_vertices = [(int(v[0]), int(v[1])) for v in vertices] + if len(int_vertices) > 1: + for i in range(len(int_vertices)): + j = (i + 1) % len(int_vertices) + draw.line([int_vertices[i], int_vertices[j]], fill=outline, width=width) + + +def pillow_draw_polyline(draw, points, color=(255, 0, 0), width=2): + """ + Draw connected lines for a list of points (like opencv polylines). + """ + if len(points) < 2: + return + + int_points = [(int(p[0]), int(p[1])) for p in points] + for i in range(len(int_points) - 1): + draw.line([int_points[i], int_points[i + 1]], fill=color, width=width) + + +def draw_polygon_and_detection_pillow( + image, polygon_vertices, stethoscope_x, stethoscope_y +): + """ + Draw polygon & stethoscope location with Pillow, then return BGR np.array. + """ + # Convert to Pillow (BGR -> RGB) + pil_img = Image.fromarray(cv2.cvtColor(image, cv2.COLOR_BGR2RGB)) + draw = ImageDraw.Draw(pil_img) + + # polygon_vertices → [(x, y), ...] (int) + vertices = [(int(v[0]), int(v[1])) for v in polygon_vertices] + pillow_draw_polygon(draw, vertices, outline=(0, 255, 0), width=2) if stethoscope_x is not None and stethoscope_y is not None: - center = (int(stethoscope_x), int(stethoscope_y)) - cv2.circle(overlay, center, 10, (255, 0, 0), -1) - cv2.circle(overlay, center, 12, (255, 255, 255), 2) - return overlay + x, y = int(stethoscope_x), int(stethoscope_y) + # Inner circle + pillow_draw_circle(draw, (x, y), 10, fill=(255, 0, 0)) + # Outer circle + pillow_draw_circle( + draw, (x, y), 12, fill=None, outline=(255, 255, 255), width=2 + ) + + # Convert back to BGR + out_img_rgb = np.array(pil_img) + out_img_bgr = cv2.cvtColor(out_img_rgb, cv2.COLOR_RGB2BGR) + return out_img_bgr + + +def yolox_detector_inference(frame, yolox_inferencer, pose_keypoints, score_thr=0.3): + """YOLOXで聴診器を検出し、ポリゴン内部にある聴診器の中心座標を返す。""" + frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) + result = yolox_inferencer(inputs=frame_rgb, return_vis=True) + predictions = result["predictions"][0] + stethoscope_x = None + stethoscope_y = None + max_score = -1 + + nose = pose_keypoints[0] + left_shoulder = pose_keypoints[5] + right_shoulder = pose_keypoints[6] + left_hip = pose_keypoints[11] + right_hip = pose_keypoints[12] + + expanded_left_shoulder, expanded_right_shoulder = expand_points( + left_shoulder, right_shoulder + ) + expanded_left_hip, expanded_right_hip = expand_points(left_hip, right_hip) + + polygon_vertices = np.array( + [ + nose, + expanded_left_shoulder, + expanded_left_hip, + expanded_right_hip, + expanded_right_shoulder, + ] + ) + + for i, (label, score) in enumerate( + zip(predictions["labels"], predictions["scores"]) + ): + if score >= score_thr and label == 0: + bbox = predictions["bboxes"][i] + center_x = (bbox[0] + bbox[2]) / 2 + center_y = (bbox[1] + bbox[3]) / 2 + + if point_in_polygon([center_x, center_y], polygon_vertices): + if score > max_score: + stethoscope_x = center_x + stethoscope_y = center_y + max_score = score + + if stethoscope_x is None or stethoscope_y is None: + stethoscope_x = 0 + stethoscope_y = 0 + + # YOLOX の可視化出力 (RGB) + stethoscope_overlay_img = result["visualization"][0] + if ( + len(stethoscope_overlay_img.shape) == 3 + and stethoscope_overlay_img.shape[2] == 3 + ): + stethoscope_overlay_img = cv2.cvtColor( + stethoscope_overlay_img, cv2.COLOR_RGB2BGR + ) + + # Pillow描画 + stethoscope_overlay_img = draw_polygon_and_detection_pillow( + stethoscope_overlay_img, polygon_vertices, stethoscope_x, stethoscope_y + ) + return stethoscope_overlay_img, stethoscope_x, stethoscope_y def expand_points(p1, p2): - """ - 2点を中央から外側に拡張(肩や腰の領域を拡大する用途)するヘルパー関数 - """ mid_x = (p1[0] + p2[0]) / 2 mid_y = (p1[1] + p2[1]) / 2 @@ -179,78 +305,10 @@ return inside -def yolox_detector_inference(frame, yolox_inferencer, pose_keypoints, score_thr=0.3): - """YOLOXで聴診器を検出し、ポリゴン内部にある聴診器の中心座標を返す。""" - frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) - result = yolox_inferencer(inputs=frame_rgb, return_vis=True) - predictions = result["predictions"][0] - stethoscope_x = None - stethoscope_y = None - max_score = -1 - - # keypoints 配列から部位を取得 (COCOフォーマット想定) - nose = pose_keypoints[0] - left_shoulder = pose_keypoints[5] - right_shoulder = pose_keypoints[6] - left_hip = pose_keypoints[11] - right_hip = pose_keypoints[12] - - # 肩と腰を大きめに外に広げる - expanded_left_shoulder, expanded_right_shoulder = expand_points( - left_shoulder, right_shoulder - ) - expanded_left_hip, expanded_right_hip = expand_points(left_hip, right_hip) - - polygon_vertices = np.array( - [ - nose, - expanded_left_shoulder, - expanded_left_hip, - expanded_right_hip, - expanded_right_shoulder, - ] - ) - - for i, (label, score) in enumerate( - zip(predictions["labels"], predictions["scores"]) - ): - # label=0 → 聴診器と仮定 (学習済みクラスのラベルに合わせる) - if score >= score_thr and label == 0: - bbox = predictions["bboxes"][i] - center_x = (bbox[0] + bbox[2]) / 2 - center_y = (bbox[1] + bbox[3]) / 2 - - if point_in_polygon([center_x, center_y], polygon_vertices): - if score > max_score: - stethoscope_x = center_x - stethoscope_y = center_y - max_score = score - - if stethoscope_x is None or stethoscope_y is None: - stethoscope_x = 0 - stethoscope_y = 0 - - stethoscope_overlay_img = result["visualization"][0] - if ( - len(stethoscope_overlay_img.shape) == 3 - and stethoscope_overlay_img.shape[2] == 3 - ): - stethoscope_overlay_img = cv2.cvtColor( - stethoscope_overlay_img, cv2.COLOR_RGB2BGR - ) - - stethoscope_overlay_img = draw_polygon_and_detection( - stethoscope_overlay_img, polygon_vertices, stethoscope_x, stethoscope_y - ) - - return stethoscope_overlay_img, stethoscope_x, stethoscope_y - - ############################################################################### # 各種座標変換 ############################################################################### def normalize_quadrilateral_with_point(points, extra_point): - """4点(肩・肩・腰・腰)と任意の1点(聴診器)を正規化して返す。""" all_points = np.vstack([points.reshape(-1, 2), extra_point]) center = np.mean(points.reshape(-1, 2), axis=0) centered_points = all_points - center @@ -273,7 +331,7 @@ ) ) if max_edge_length == 0: - return rotated_points # 0割り防止 + return rotated_points return rotated_points / max_edge_length @@ -331,16 +389,7 @@ # 胴体クロップ生成 ############################################################################### def crop_body_from_keypoints(frame, left_shoulder, right_shoulder, left_hip, right_hip): - """ - RTMPOSE 等で推定された肩・腰をもとに胴体をざっくり囲むバウンディングボックスを計算し、 - そこをクロップして返す。 - 戻り値: (cropped_frame, (xmin, ymin)) - cropped_frame: クロップ後の画像 (np.ndarray) - (xmin, ymin): クロップ領域の左上座標 (元画像座標系へのマッピング用) - """ h, w, _ = frame.shape - - # 左右肩・左右腰 4点の x, y xs = [left_shoulder[0], right_shoulder[0], left_hip[0], right_hip[0]] ys = [left_shoulder[1], right_shoulder[1], left_hip[1], right_hip[1]] @@ -349,7 +398,6 @@ ymin = int(min(ys)) ymax = int(max(ys)) - # 多少のマージンを足す (上下左右に 20 ピクセルなど) margin = 20 xmin = max(0, xmin - margin) xmax = min(w, xmax + margin) @@ -357,7 +405,6 @@ ymax = min(h, ymax + margin) cropped_frame = frame[ymin:ymax, xmin:xmax].copy() - return cropped_frame, (xmin, ymin) @@ -376,10 +423,8 @@ pose_overlay_dir = os.path.join(results_dir, "pose_overlay_image") stethoscope_overlay_dir = os.path.join(results_dir, "stethoscope_overlay_image") - # クロップ画像を保存するディレクトリを作成 cropped_dir = os.path.join(results_dir, "cropped_images") os.makedirs(cropped_dir, exist_ok=True) - os.makedirs(results_dir, exist_ok=True) os.makedirs(pose_overlay_dir, exist_ok=True) os.makedirs(stethoscope_overlay_dir, exist_ok=True) @@ -393,18 +438,11 @@ rows = [] normalized_rows = [] - # ------------------------------------------ - # YOLOX 初期化 - # ------------------------------------------ yolox_inferencer = None if YOLOX_ENABLED: yolox_inferencer = init_yolox() - # ------------------------------------------ - # 時間計測用 dict - # ------------------------------------------ timings = { - # 単体推論 "rtmpose_single": [], "yolox_single": [], "conv_single": [], @@ -412,18 +450,14 @@ "xgboost_single": [], "earsnet_single": [], "earsnet_cropped_single": [], - # パイプライン推論 "pipeline_rtmpose_yolox_conv": [], "pipeline_rtmpose_yolox_lightgbm": [], "pipeline_rtmpose_yolox_xgboost": [], - # 今回修正 - "pipeline_earsnet": [], # EARSNet 単体 - "pipeline_earsnet_cropped": [], # RTMPose + EARSNet(クロップ) + "pipeline_earsnet": [], + "pipeline_earsnet_cropped": [], } - # ------------------------------------------ - # 各モデルの事前ロード - # ------------------------------------------ + # モデルロード if LIGHTGBM_ENABLED: lgb_model_x = load_model("./models/LightGBM/stethoscope_calc_x_best_model.pkl") lgb_model_y = load_model("./models/LightGBM/stethoscope_calc_y_best_model.pkl") @@ -452,7 +486,6 @@ "./models/NGBoost/stethoscope_calc_y_best_model.pkl" ) - # 通常 EARSNet (クロップなし) if EARSNET_ENABLED: earsnet_predictor = EARSNetPredictor( weight_path="models/EARSNet/best_model.pth", @@ -461,16 +494,14 @@ device=DEVICE, ) - # クロップ画像用 EARSNet (別モデル) if EARSNET_CROP_ENABLED: earsnet_cropped_predictor = EARSNetPredictor( - weight_path="models/EARSNet/crop/best_model.pth", # 想定モデルファイル + weight_path="models/EARSNet/crop/best_model.pth", resnet_depth="18", pretrained=True, device=DEVICE, ) - # CSVで使用する列 input_columns = [ "left_shoulder_x", "left_shoulder_y", @@ -484,9 +515,7 @@ "stethoscope_y", ] - # ------------------------------------------------------------ - # メインループ(フレームごとに処理) - # ------------------------------------------------------------ + # メインループ for image_file_name in png_files: image_path = os.path.join(base_dir, image_file_name) frame = cv2.imread(image_path) @@ -494,11 +523,9 @@ print(f"Failed to load image: {image_path}") continue - # (A) RTMPose rtmpose_time = 0.0 if RTMPOSE_ENABLED: start_time_rtmpose = time.time() - # ===== RTMpose推論 ===== frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) det_result = inference_detector(detector, frame_rgb) pred_instance = det_result.pred_instances.cpu().numpy() @@ -506,7 +533,6 @@ bboxes = np.concatenate( (pred_instance.bboxes, pred_instance.scores[:, None]), axis=1 ) - # 人物のみ (label=0想定) bboxes = bboxes[ np.logical_and(pred_instance.labels == 0, pred_instance.scores > 0.3) ] @@ -539,44 +565,39 @@ wait_time=0, kpt_thr=0.3, ) - pose_overlay_img = visualizer.get_image() # (RGB) + pose_overlay_img = visualizer.get_image() pose_overlay_bgr = cv2.cvtColor(pose_overlay_img, cv2.COLOR_RGB2BGR) cv2.imwrite( os.path.join(pose_overlay_dir, image_file_name), pose_overlay_bgr ) - # COCOフォーマットのキーポイントを取り出す left_shoulder = (pose_keypoints[5][0], pose_keypoints[5][1]) right_shoulder = (pose_keypoints[6][0], pose_keypoints[6][1]) left_hip = (pose_keypoints[11][0], pose_keypoints[11][1]) right_hip = (pose_keypoints[12][0], pose_keypoints[12][1]) elif POSENET_ENABLED: - # 既存 PoseNet start_time_rtmpose = time.time() pose_overlay_img, *landmarks = ears_ai.pose_detect(frame, None) end_time_rtmpose = time.time() rtmpose_time = end_time_rtmpose - start_time_rtmpose timings["rtmpose_single"].append(rtmpose_time) - # landmarks = [left_shoulder, right_shoulder, left_hip, right_hip] left_shoulder = landmarks[0] right_shoulder = landmarks[1] left_hip = landmarks[2] right_hip = landmarks[3] - # pose_overlay_img はすでに BGR 形式想定 cv2.imwrite( os.path.join(pose_overlay_dir, image_file_name), pose_overlay_img ) else: - # RTMPose/PoseNet どちらも有効でない場合 left_shoulder = (0, 0) right_shoulder = (0, 0) left_hip = (0, 0) right_hip = (0, 0) - # (B) YOLOX (必要なら) + # YOLOX yolox_time = 0.0 stethoscope_x, stethoscope_y = 0, 0 if YOLOX_ENABLED: @@ -593,14 +614,12 @@ yolox_time = end_time_yolox - start_time_yolox timings["yolox_single"].append(yolox_time) - # 可視化 cv2.imwrite( os.path.join(stethoscope_overlay_dir, image_file_name), stethoscope_overlay_img, ) elif POSENET_ENABLED: - # PoseNet 用のキー配列に変換してYOLOX pose_keypoints_pose_net = [[0, 0]] * 13 pose_keypoints_pose_net[5] = (left_shoulder[0], left_shoulder[1]) pose_keypoints_pose_net[6] = (right_shoulder[0], right_shoulder[1]) @@ -617,16 +636,13 @@ yolox_time = end_time_yolox - start_time_yolox timings["yolox_single"].append(yolox_time) - # 可視化 cv2.imwrite( os.path.join(stethoscope_overlay_dir, image_file_name), stethoscope_overlay_img, ) - # ここで、(RTMPose + YOLOX) の合計検出時間をパイプラインに使う場合あり detection_time_rtmpose_yolox = rtmpose_time + yolox_time - # CSV用に座標をまとめる row = { "image_file_name": image_file_name, "left_shoulder_x": left_shoulder[0], @@ -641,54 +657,39 @@ "stethoscope_y": stethoscope_y, } - # (C) EARSNet 単体 - # -> pipeline_earsnet は RTMPose, YOLOX を含まない if EARSNET_ENABLED: start_time_earsnet = time.time() earsnet_x, earsnet_y = earsnet_predictor.predict(image_path) end_time_earsnet = time.time() - earsnet_time = end_time_earsnet - start_time_earsnet timings["earsnet_single"].append(earsnet_time) - - # pipeline_earsnet = earsnet単体時間 timings["pipeline_earsnet"].append(earsnet_time) row["earsnet_stethoscope_x"] = earsnet_x row["earsnet_stethoscope_y"] = earsnet_y - # (D) クロップ画像 EARSNet ( RTMPose + EARSNet_Cropped ) if EARSNET_CROP_ENABLED: - # 1) クロップ生成 cropped_img, (crop_xmin, crop_ymin) = crop_body_from_keypoints( frame, left_shoulder, right_shoulder, left_hip, right_hip ) - # クロップ画像を保存(確認用) cropped_filename = os.path.splitext(image_file_name)[0] + "_cropped.png" cv2.imwrite(os.path.join(cropped_dir, cropped_filename), cropped_img) - # 2) EARSNet (クロップ版) start_time_earsnet_cropped = time.time() earsnet_cropped_x, earsnet_cropped_y = earsnet_cropped_predictor.predict( os.path.join(cropped_dir, cropped_filename) ) end_time_earsnet_cropped = time.time() - earsnet_cropped_time = end_time_earsnet_cropped - start_time_earsnet_cropped timings["earsnet_cropped_single"].append(earsnet_cropped_time) - # pipeline_earsnet_cropped = RTMPose時間 + EARSNet(クロップ) pipeline_earsnet_cropped_time = rtmpose_time + earsnet_cropped_time timings["pipeline_earsnet_cropped"].append(pipeline_earsnet_cropped_time) - # 3) 座標を元画像に変換 - global_x = earsnet_cropped_x - global_y = earsnet_cropped_y + row["earsnet_crop_stethoscope_x"] = earsnet_cropped_x + row["earsnet_crop_stethoscope_y"] = earsnet_cropped_y - row["earsnet_crop_stethoscope_x"] = global_x - row["earsnet_crop_stethoscope_y"] = global_y - - # (E) 正規化 + # 正規化 source_points = np.array( [ [float(row["left_shoulder_x"]), float(row["left_shoulder_y"])], @@ -698,14 +699,12 @@ ], dtype=np.float32, ) - stethoscope_point = np.array( [float(row["stethoscope_x"]), float(row["stethoscope_y"])] ) normalized_points = normalize_quadrilateral_with_point( source_points.flatten(), stethoscope_point ) - normalized_row = { "image_file_name": image_file_name, "left_shoulder_x": normalized_points[0, 0], @@ -749,11 +748,8 @@ rows.append(row) normalized_rows.append(normalized_row) - # (F) パイプライン (RTMPose+YOLOX → Conv/LightGBM/XGBoost) - # ここは従来通り: detection_time_rtmpose_yolox + 各モデル時間 - + # パイプライン if RTMPOSE_ENABLED and YOLOX_ENABLED: - # conv if CONV_ENABLED: start_conv = time.time() source_pts = np.array( @@ -775,13 +771,10 @@ end_conv = time.time() conv_time = end_conv - start_conv timings["conv_single"].append(conv_time) - - # pipeline_rtmpose_yolox_conv timings["pipeline_rtmpose_yolox_conv"].append( detection_time_rtmpose_yolox + conv_time ) - # XGBoost if XGBOOST_ENABLED: xg_start = time.time() if NORMALIZE_ENABLED: @@ -795,12 +788,10 @@ xg_end = time.time() xg_time = xg_end - xg_start timings["xgboost_single"].append(xg_time) - timings["pipeline_rtmpose_yolox_xgboost"].append( detection_time_rtmpose_yolox + xg_time ) - # LightGBM if LIGHTGBM_ENABLED: lgb_start = time.time() if NORMALIZE_ENABLED: @@ -814,21 +805,17 @@ lgb_end = time.time() lgb_time = lgb_end - lgb_start timings["lightgbm_single"].append(lgb_time) - timings["pipeline_rtmpose_yolox_lightgbm"].append( detection_time_rtmpose_yolox + lgb_time ) processed_frames += 1 - # ======================================================================== # CSV 書き込み - # ======================================================================== if rows: fieldnames = list(rows[0].keys()) csvfile_path = os.path.join(results_dir, "results.csv") normfile_path = os.path.join(results_dir, "results-convert.csv") - os.makedirs(results_dir, exist_ok=True) with ( @@ -849,14 +836,11 @@ print(f"Processed and saved results to: {csvfile_path}") print(f"Processed and saved normalized results to: {normfile_path}") - # 可視化・動画化 generate_visualizations(csvfile_path, base_dir, results_dir) else: print("No data to write to CSV.") - # ======================================================================== - # FPS計算 & CSV保存 (サブコンポーネント&パイプラインごとの合計/平均) - # ======================================================================== + # FPS計測 fps_data = [] for method_name, time_list in timings.items(): if not time_list: @@ -900,17 +884,25 @@ ############################################################################### -# 可視化・動画化 +# 可視化・動画化(Body画像への描画も Pillow で行う) ############################################################################### def generate_visualizations(csv_path, original_images_dir, results_dir): """ - CSVに書き込んだ推定結果を用い、BodyF.pngへの描画や動画化を行う。 - EARSNetクロップ版の結果も描画できるように調整。 + CSVに書き込んだ推定結果を用い、BodyF.png(or BodyB.png)への描画や動画化を行う。 + すべての描画をPillowで実装。最終的にcv2.imwrite()で保存するのでRGB→BGR変換が必要。 """ df = pd.read_csv(csv_path) - body_image = cv2.imread("./images/body/BodyF.png") - # 生成ディレクトリ設定 + body_image_path = "./images/body/BodyF.png" + if not os.path.exists(body_image_path): + print(f"Warning: {body_image_path} not found.") + return + + # Pillow (RGB) + body_img_pil = Image.open(body_image_path).convert("RGB") + # np.array()すると「RGB順」のまま入る + body_np_rgb = np.array(body_img_pil) + dirs = {"marked": "marked_images"} if CONV_ENABLED: dirs["conv"] = "conv" @@ -940,10 +932,8 @@ exist_ok=True, ) - # 描画に使う座標列 points = {key: [] for key in dirs.keys() if key not in ["marked", "combined"]} - # 色設定 colors = { "conv": CONV_COLOR, "Xgboost": XGBOOST_COLOR, @@ -951,7 +941,7 @@ "catboost": CATBOOST_COLOR, "ngboost": NGBOOST_COLOR, "earsnet": EARSNET_COLOR, - "earsnet_crop": (255, 51, 255), # ピンク系 + "earsnet_crop": (255, 51, 255), } for _, row in df.iterrows(): @@ -962,7 +952,10 @@ if original_image is None: continue - # 肩・腰・聴診器などをマーキング + # --- 1) マーキング --- + pil_marked = Image.fromarray(cv2.cvtColor(original_image, cv2.COLOR_BGR2RGB)) + draw_marked = ImageDraw.Draw(pil_marked) + for point in [ "left_shoulder", "right_shoulder", @@ -977,25 +970,25 @@ val_y = row[col_y] if pd.isna(val_x) or pd.isna(val_y): continue - cv2.circle( - original_image, - (int(val_x), int(val_y)), - 10, - (255, 255, 0), - -1, - ) - # 保存 + x, y = int(val_x), int(val_y) + pillow_draw_circle(draw_marked, (x, y), 5, fill=(255, 255, 0)) + + # Pillow(RGB) → BGR + marked_rgb = np.array(pil_marked) + marked_bgr = cv2.cvtColor(marked_rgb, cv2.COLOR_RGB2BGR) marked_dir = os.path.join(results_dir, "marked_images") - cv2.imwrite( - os.path.join(marked_dir, row["image_file_name"]), - original_image, - ) + cv2.imwrite(os.path.join(marked_dir, row["image_file_name"]), marked_bgr) - # BodyF.png に軌跡を描画 - combined_image_with_traj = body_image.copy() - combined_image_without_traj = body_image.copy() + # --- 2) BodyF.pngに軌跡描画 --- + # body_np_rgbをコピー (RGB配列) + combined_image_with_traj_rgb = body_np_rgb.copy() + combined_image_without_traj_rgb = body_np_rgb.copy() - # 各推定結果を描画 + pil_with_traj = Image.fromarray(combined_image_with_traj_rgb) + pil_without_traj = Image.fromarray(combined_image_without_traj_rgb) + draw_with_traj = ImageDraw.Draw(pil_with_traj) + draw_without_traj = ImageDraw.Draw(pil_without_traj) + for key in points.keys(): col_x = f"{key}_stethoscope_x" col_y = f"{key}_stethoscope_y" @@ -1012,73 +1005,55 @@ color = colors[key] if key in colors else (0, 0, 255) # 個別 with trajectory - image_with_trajectory = body_image.copy() + indiv_with_traj_rgb = body_np_rgb.copy() + pil_indiv_with = Image.fromarray(indiv_with_traj_rgb) + draw_indiv_with = ImageDraw.Draw(pil_indiv_with) + if len(points[key]) > 1: - cv2.polylines( - image_with_trajectory, - [np.array(points[key])], - False, - color, - 2, - ) - cv2.circle( - image_with_trajectory, - (x, y), - 10, - color, - -1, + pillow_draw_polyline(draw_indiv_with, points[key], color=color, width=2) + pillow_draw_circle(draw_indiv_with, (x, y), 10, fill=color) + + indiv_with_traj_rgb2 = np.array(pil_indiv_with) + # RGB -> BGR + indiv_with_traj_bgr = cv2.cvtColor(indiv_with_traj_rgb2, cv2.COLOR_RGB2BGR) + out_path_with = os.path.join( + results_dir, f"{dirs[key]}_with_trajectory", row["image_file_name"] ) - cv2.imwrite( - os.path.join( - results_dir, f"{dirs[key]}_with_trajectory", row["image_file_name"] - ), - image_with_trajectory, - ) + cv2.imwrite(out_path_with, indiv_with_traj_bgr) # 個別 without trajectory - image_without_trajectory = body_image.copy() - cv2.circle( - image_without_trajectory, - (x, y), - 10, - color, - -1, + indiv_without_traj_rgb = body_np_rgb.copy() + pil_indiv_without = Image.fromarray(indiv_without_traj_rgb) + draw_indiv_without = ImageDraw.Draw(pil_indiv_without) + pillow_draw_circle(draw_indiv_without, (x, y), 10, fill=color) + + indiv_without_traj_rgb2 = np.array(pil_indiv_without) + indiv_without_traj_bgr = cv2.cvtColor( + indiv_without_traj_rgb2, cv2.COLOR_RGB2BGR ) - cv2.imwrite( - os.path.join( - results_dir, - f"{dirs[key]}_without_trajectory", - row["image_file_name"], - ), - image_without_trajectory, + out_path_without = os.path.join( + results_dir, f"{dirs[key]}_without_trajectory", row["image_file_name"] ) + cv2.imwrite(out_path_without, indiv_without_traj_bgr) # combined with trajectory if len(points[key]) > 1: - cv2.polylines( - combined_image_with_traj, - [np.array(points[key])], - False, - color, - 2, - ) - cv2.circle( - combined_image_with_traj, - (x, y), - 10, - color, - -1, - ) - # combined without trajectory - cv2.circle( - combined_image_without_traj, - (x, y), - 10, - color, - -1, - ) + pillow_draw_polyline(draw_with_traj, points[key], color=color, width=2) + pillow_draw_circle(draw_with_traj, (x, y), 10, fill=color) - # まとめて保存 + # combined without trajectory + pillow_draw_circle(draw_without_traj, (x, y), 10, fill=color) + + # 結果 (pil_with_traj / pil_without_traj) を BGR に変換して保存 + combined_with_traj_rgb2 = np.array(pil_with_traj) # RGB + combined_without_traj_rgb2 = np.array(pil_without_traj) # RGB + combined_with_traj_bgr = cv2.cvtColor( + combined_with_traj_rgb2, cv2.COLOR_RGB2BGR + ) + combined_without_traj_bgr = cv2.cvtColor( + combined_without_traj_rgb2, cv2.COLOR_RGB2BGR + ) + os.makedirs( os.path.join(results_dir, "combined_with_trajectory"), exist_ok=True ) @@ -1090,13 +1065,13 @@ os.path.join( results_dir, "combined_with_trajectory", row["image_file_name"] ), - combined_image_with_traj, + combined_with_traj_bgr, ) cv2.imwrite( os.path.join( results_dir, "combined_without_trajectory", row["image_file_name"] ), - combined_image_without_traj, + combined_without_traj_bgr, ) # 動画化 @@ -1153,7 +1128,7 @@ parser = argparse.ArgumentParser(description="Process video and generate results.") parser.add_argument( "--video_path", - default="./video/Media1.mp4", + default="./video/tes.mp4", help="Path to the input video file", ) parser.add_argument( @@ -1162,7 +1137,6 @@ help="Directory to save output images and results", ) - # RTMpose 用の config & checkpoint (必要に応じて変更) det_config = "modules/rtmpose/mmdetection_cfg/rtmdet_m_640-8xb32_coco-person.py" det_checkpoint = ( "models/rtmpose/rtmdet_m_8xb32-100e_coco-obj365-person-235e8209.pth" @@ -1174,18 +1148,14 @@ pose_checkpoint = "models/rtmpose/rtmpose-l_simcc-aic-coco_pt-aic-coco_420e-256x192-f016ffe0_20230126.pth" args = parser.parse_args() - os.makedirs(args.output_dir, exist_ok=True) - # 1) FPSモニタ用スレッド開始 fps_thread = Thread(target=fps_monitor, args=(1.0,), daemon=True) fps_thread.start() - # 2) 動画をフレームに分割 frames_dir = os.path.join(args.output_dir, "frames") video_to_frames(args.video_path, frames_dir) - # 3) RTMPOSE初期化 (必要なときのみ) if RTMPOSE_ENABLED: detector = init_detector(det_config, det_checkpoint, device=DEVICE) detector.cfg = adapt_mmdet_pipeline(detector.cfg) @@ -1201,7 +1171,6 @@ else: process_images(args, None, None, None) - # 4) スレッド終了指示・join global stop_fps_thread stop_fps_thread = True fps_thread.join()