diff --git a/main.py b/main.py index b849b37..69fe303 100644 --- a/main.py +++ b/main.py @@ -51,27 +51,21 @@ MobileNetV1SSD_ENABLED = config.MOBILENETV1SSD_ENABLED YOLOX_ENABLED = config.YOLOX_ENABLED EARSNET_ENABLED = config.EARSNET_ENABLED - -# ★ クロップ画像を使う EARSNet (別モデル) を使うかどうか EARSNET_CROP_ENABLED = config.EARSNET_CROP_ENABLED NORMALIZE_ENABLED = config.NORMALIZE_ENABLED - DEVICE = config.DEVICE # "cuda" or "cpu" など ############################################################################### -# リアルタイムFPS計測用のグローバル変数&スレッド定義 +# リアルタイムFPS計測用のグローバル変数&スレッド定義 (描画時間は含まない) ############################################################################### -processed_frames = 0 # 処理済みフレーム数(メインスレッドでインクリメント) +processed_frames = 0 # 推論処理が完了したフレーム数(メインスレッドでインクリメント) stop_fps_thread = False # スレッド終了フラグ fps_history = [] def fps_monitor(interval=1.0): - """ - 別スレッドとして起動し、一定時間おきに processed_frames を確認してリアルタイムFPSを計算する。 - interval=1.0 なら1秒ごとにFPSを出力。 - """ + """推論処理完了したフレーム数を定期的に見てFPSを算出する。描画時間は含まない。""" global processed_frames, stop_fps_thread, fps_history last_count = 0 @@ -93,7 +87,6 @@ print( f"[FPS Monitor] Real-time FPS: {current_fps:.2f} (frames: +{frames_delta})" ) - fps_history.append((now, current_fps)) last_count = current_count @@ -139,13 +132,6 @@ # Pillow-based drawing helpers ############################################################################### def pillow_draw_circle(draw, center, radius, fill=None, outline=None, width=1): - """ - Draw a circle (via ellipse) on a Pillow draw context. - center: (x, y) - radius: int - fill, outline: color tuples (R, G, B) - width: outline thickness if fill=None - """ x, y = int(center[0]), int(center[1]) left_up = (x - radius, y - radius) right_down = (x + radius, y + radius) @@ -156,10 +142,6 @@ def pillow_draw_polygon(draw, vertices, outline=(0, 255, 0), width=2): - """ - Draw a polygon (as connected lines) in Pillow. - vertices: list of (x, y) - """ int_vertices = [(int(v[0]), int(v[1])) for v in vertices] if len(int_vertices) > 1: for i in range(len(int_vertices)): @@ -168,12 +150,8 @@ def pillow_draw_polyline(draw, points, color=(255, 0, 0), width=2): - """ - Draw connected lines for a list of points (like opencv polylines). - """ if len(points) < 2: return - int_points = [(int(p[0]), int(p[1])) for p in points] for i in range(len(int_points) - 1): draw.line([int_points[i], int_points[i + 1]], fill=color, width=width) @@ -182,34 +160,26 @@ def draw_polygon_and_detection_pillow( image, polygon_vertices, stethoscope_x, stethoscope_y ): - """ - Draw polygon & stethoscope location with Pillow, then return BGR np.array. - """ - # Convert to Pillow (BGR -> RGB) + """Draw polygon & stethoscope location with Pillow, then return BGR np.array.""" pil_img = Image.fromarray(cv2.cvtColor(image, cv2.COLOR_BGR2RGB)) draw = ImageDraw.Draw(pil_img) - # polygon_vertices → [(x, y), ...] (int) vertices = [(int(v[0]), int(v[1])) for v in polygon_vertices] pillow_draw_polygon(draw, vertices, outline=(0, 255, 0), width=2) if stethoscope_x is not None and stethoscope_y is not None: x, y = int(stethoscope_x), int(stethoscope_y) - # Inner circle pillow_draw_circle(draw, (x, y), 10, fill=(255, 0, 0)) - # Outer circle pillow_draw_circle( draw, (x, y), 12, fill=None, outline=(255, 255, 255), width=2 ) - # Convert back to BGR out_img_rgb = np.array(pil_img) out_img_bgr = cv2.cvtColor(out_img_rgb, cv2.COLOR_RGB2BGR) return out_img_bgr def yolox_detector_inference(frame, yolox_inferencer, pose_keypoints, score_thr=0.3): - """YOLOXで聴診器を検出し、ポリゴン内部にある聴診器の中心座標を返す。""" frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) result = yolox_inferencer(inputs=frame_rgb, return_vis=True) predictions = result["predictions"][0] @@ -256,7 +226,6 @@ stethoscope_x = 0 stethoscope_y = 0 - # YOLOX の可視化出力 (RGB) stethoscope_overlay_img = result["visualization"][0] if ( len(stethoscope_overlay_img.shape) == 3 @@ -266,7 +235,6 @@ stethoscope_overlay_img, cv2.COLOR_RGB2BGR ) - # Pillow描画 stethoscope_overlay_img = draw_polygon_and_detection_pillow( stethoscope_overlay_img, polygon_vertices, stethoscope_x, stethoscope_y ) @@ -282,7 +250,6 @@ new_p1 = [mid_x + vec_x * 2, mid_y + vec_y * 2] new_p2 = [mid_x - vec_x * 2, mid_y - vec_y * 2] - return np.array(new_p1), np.array(new_p2) @@ -290,7 +257,6 @@ x, y = point n = len(vertices) inside = False - j = n - 1 for i in range(n): if (vertices[i][1] > y) != (vertices[j][1] > y): @@ -301,7 +267,6 @@ if x < intersect_x: inside = not inside j = i - return inside @@ -332,7 +297,6 @@ ) if max_edge_length == 0: return rotated_points - return rotated_points / max_edge_length @@ -409,7 +373,7 @@ ############################################################################### -# メイン処理 +# メイン処理 (推論 & 座標計算のみ) -> FPS計測対象 ############################################################################### def process_images(args, detector, pose_estimator, visualizer): global processed_frames @@ -424,10 +388,10 @@ stethoscope_overlay_dir = os.path.join(results_dir, "stethoscope_overlay_image") cropped_dir = os.path.join(results_dir, "cropped_images") - os.makedirs(cropped_dir, exist_ok=True) os.makedirs(results_dir, exist_ok=True) os.makedirs(pose_overlay_dir, exist_ok=True) os.makedirs(stethoscope_overlay_dir, exist_ok=True) + os.makedirs(cropped_dir, exist_ok=True) png_files = sorted( [f for f in os.listdir(base_dir) if f.lower().endswith(".png")], @@ -438,10 +402,12 @@ rows = [] normalized_rows = [] + # YOLOX初期化 yolox_inferencer = None if YOLOX_ENABLED: yolox_inferencer = init_yolox() + # 時間計測用 dict (描画の時間は含まない) timings = { "rtmpose_single": [], "yolox_single": [], @@ -457,7 +423,7 @@ "pipeline_earsnet_cropped": [], } - # モデルロード + # 各モデルの事前ロード if LIGHTGBM_ENABLED: lgb_model_x = load_model("./models/LightGBM/stethoscope_calc_x_best_model.pkl") lgb_model_y = load_model("./models/LightGBM/stethoscope_calc_y_best_model.pkl") @@ -515,7 +481,9 @@ "stethoscope_y", ] - # メインループ + # ----------------------------- + # メインループ:推論 & 座標計算のみ + # ----------------------------- for image_file_name in png_files: image_path = os.path.join(base_dir, image_file_name) frame = cv2.imread(image_path) @@ -523,6 +491,7 @@ print(f"Failed to load image: {image_path}") continue + # (A) RTMPose or PoseNet rtmpose_time = 0.0 if RTMPOSE_ENABLED: start_time_rtmpose = time.time() @@ -533,6 +502,7 @@ bboxes = np.concatenate( (pred_instance.bboxes, pred_instance.scores[:, None]), axis=1 ) + # 人物のみ bboxes = bboxes[ np.logical_and(pred_instance.labels == 0, pred_instance.scores > 0.3) ] @@ -551,6 +521,7 @@ processed_frames += 1 continue + # PoseOverlay(可視化) → 時間計測には含めない if visualizer is not None: visualizer.add_datasample( "result", @@ -597,7 +568,7 @@ left_hip = (0, 0) right_hip = (0, 0) - # YOLOX + # (B) YOLOX yolox_time = 0.0 stethoscope_x, stethoscope_y = 0, 0 if YOLOX_ENABLED: @@ -618,7 +589,6 @@ os.path.join(stethoscope_overlay_dir, image_file_name), stethoscope_overlay_img, ) - elif POSENET_ENABLED: pose_keypoints_pose_net = [[0, 0]] * 13 pose_keypoints_pose_net[5] = (left_shoulder[0], left_shoulder[1]) @@ -657,6 +627,7 @@ "stethoscope_y": stethoscope_y, } + # (C) EARSNet (単体) if EARSNET_ENABLED: start_time_earsnet = time.time() earsnet_x, earsnet_y = earsnet_predictor.predict(image_path) @@ -668,6 +639,7 @@ row["earsnet_stethoscope_x"] = earsnet_x row["earsnet_stethoscope_y"] = earsnet_y + # (D) EARSNet (クロップ) if EARSNET_CROP_ENABLED: cropped_img, (crop_xmin, crop_ymin) = crop_body_from_keypoints( frame, left_shoulder, right_shoulder, left_hip, right_hip @@ -689,7 +661,7 @@ row["earsnet_crop_stethoscope_x"] = earsnet_cropped_x row["earsnet_crop_stethoscope_y"] = earsnet_cropped_y - # 正規化 + # (E) 正規化 source_points = np.array( [ [float(row["left_shoulder_x"]), float(row["left_shoulder_y"])], @@ -719,6 +691,7 @@ "stethoscope_y": normalized_points[4, 1], } + # --- EARSNet の Normalized if EARSNET_ENABLED: stetho_point_earsnet = np.array( [ @@ -745,78 +718,72 @@ normalized_row["earsnet_crop_stethoscope_x"] = norm_earsnet_crop[4, 0] normalized_row["earsnet_crop_stethoscope_y"] = norm_earsnet_crop[4, 1] + # --- Conv (Affine) + if RTMPOSE_ENABLED and YOLOX_ENABLED and CONV_ENABLED: + # すでに start_conv, end_conv は timings計測用のみ + # → ここでピクセル座標を row に書き込む + source_pts = np.array( + [ + [float(row[f"{pos}_x"]), float(row[f"{pos}_y"])] + for pos in [ + "left_shoulder", + "right_shoulder", + "left_hip", + "right_hip", + ] + ], + dtype=np.float32, + ) + stetho_pt = np.array( + [float(row["stethoscope_x"]), float(row["stethoscope_y"])] + ) + calc_x, calc_y = calc_position.calc_affine(source_pts, *stetho_pt) + + # conv_stethoscope_x/y を row に書き込み + row["conv_stethoscope_x"] = calc_x + row["conv_stethoscope_y"] = calc_y + + # --- XGBoost + if RTMPOSE_ENABLED and YOLOX_ENABLED and XGBOOST_ENABLED: + if NORMALIZE_ENABLED: + input_data_xg = pd.DataFrame([normalized_row]) + else: + input_data_xg = pd.DataFrame([row]) + + X_scaled_x = xg_scaler_x.transform(input_data_xg[input_columns]) + x_pred = xg_model_x.predict(X_scaled_x)[0] + X_scaled_y = xg_scaler_y.transform(input_data_xg[input_columns]) + y_pred = xg_model_y.predict(X_scaled_y)[0] + + row["Xgboost_stethoscope_x"] = x_pred + row["Xgboost_stethoscope_y"] = y_pred + + # --- LightGBM + if RTMPOSE_ENABLED and YOLOX_ENABLED and LIGHTGBM_ENABLED: + if NORMALIZE_ENABLED: + input_data_lgb = pd.DataFrame([normalized_row]) + else: + input_data_lgb = pd.DataFrame([row]) + + X_scaled_x = lgb_scaler_x.transform(input_data_lgb[input_columns]) + lgb_x_pred = lgb_model_x.predict(X_scaled_x)[0] + X_scaled_y = lgb_scaler_y.transform(input_data_lgb[input_columns]) + lgb_y_pred = lgb_model_y.predict(X_scaled_y)[0] + + row["lightGBM_stethoscope_x"] = lgb_x_pred + row["lightGBM_stethoscope_y"] = lgb_y_pred + + # row, normalized_row を最終リストに追加 rows.append(row) normalized_rows.append(normalized_row) - # パイプライン - if RTMPOSE_ENABLED and YOLOX_ENABLED: - if CONV_ENABLED: - start_conv = time.time() - source_pts = np.array( - [ - [float(row[f"{pos}_x"]), float(row[f"{pos}_y"])] - for pos in [ - "left_shoulder", - "right_shoulder", - "left_hip", - "right_hip", - ] - ], - dtype=np.float32, - ) - stetho_pt = np.array( - [float(row["stethoscope_x"]), float(row["stethoscope_y"])] - ) - _ = calc_position.calc_affine(source_pts, *stetho_pt) - end_conv = time.time() - conv_time = end_conv - start_conv - timings["conv_single"].append(conv_time) - timings["pipeline_rtmpose_yolox_conv"].append( - detection_time_rtmpose_yolox + conv_time - ) - - if XGBOOST_ENABLED: - xg_start = time.time() - if NORMALIZE_ENABLED: - input_data_xg = pd.DataFrame([normalized_rows[-1]]) - else: - input_data_xg = pd.DataFrame([rows[-1]]) - X_scaled_x = xg_scaler_x.transform(input_data_xg[input_columns]) - _ = xg_model_x.predict(X_scaled_x)[0] - X_scaled_y = xg_scaler_y.transform(input_data_xg[input_columns]) - _ = xg_model_y.predict(X_scaled_y)[0] - xg_end = time.time() - xg_time = xg_end - xg_start - timings["xgboost_single"].append(xg_time) - timings["pipeline_rtmpose_yolox_xgboost"].append( - detection_time_rtmpose_yolox + xg_time - ) - - if LIGHTGBM_ENABLED: - lgb_start = time.time() - if NORMALIZE_ENABLED: - input_data_lgb = pd.DataFrame([normalized_rows[-1]]) - else: - input_data_lgb = pd.DataFrame([rows[-1]]) - X_scaled_x = lgb_scaler_x.transform(input_data_lgb[input_columns]) - _ = lgb_model_x.predict(X_scaled_x)[0] - X_scaled_y = lgb_scaler_y.transform(input_data_lgb[input_columns]) - _ = lgb_model_y.predict(X_scaled_y)[0] - lgb_end = time.time() - lgb_time = lgb_end - lgb_start - timings["lightgbm_single"].append(lgb_time) - timings["pipeline_rtmpose_yolox_lightgbm"].append( - detection_time_rtmpose_yolox + lgb_time - ) - processed_frames += 1 - # CSV 書き込み + # (G) CSV書き込み if rows: fieldnames = list(rows[0].keys()) csvfile_path = os.path.join(results_dir, "results.csv") normfile_path = os.path.join(results_dir, "results-convert.csv") - os.makedirs(results_dir, exist_ok=True) with ( open(csvfile_path, "w", newline="") as csvfile, @@ -836,11 +803,13 @@ print(f"Processed and saved results to: {csvfile_path}") print(f"Processed and saved normalized results to: {normfile_path}") + # ★★★推論結果が出そろった後で描画 (描画時間はFPSに含めない)★★★ generate_visualizations(csvfile_path, base_dir, results_dir) + else: print("No data to write to CSV.") - # FPS計測 + # (H) FPS計算結果をCSV保存 fps_data = [] for method_name, time_list in timings.items(): if not time_list: @@ -884,25 +853,27 @@ ############################################################################### -# 可視化・動画化(Body画像への描画も Pillow で行う) +# 可視化・動画化 (描画時間はFPSに含めない) ############################################################################### def generate_visualizations(csv_path, original_images_dir, results_dir): """ - CSVに書き込んだ推定結果を用い、BodyF.png(or BodyB.png)への描画や動画化を行う。 - すべての描画をPillowで実装。最終的にcv2.imwrite()で保存するのでRGB→BGR変換が必要。 + CSVを読み込み、BodyF.png上や元フレーム上に各手法の結果を描画 → 動画化。 + 描画時間はFPSに含めず、ここでまとめて行う。 + + 手法が True になっているものについては、 + '_with_trajectory' と '_without_trajectory' の両動画を生成。 """ df = pd.read_csv(csv_path) - body_image_path = "./images/body/BodyF.png" if not os.path.exists(body_image_path): print(f"Warning: {body_image_path} not found.") return - # Pillow (RGB) + # Pillow(RGB)で開く body_img_pil = Image.open(body_image_path).convert("RGB") - # np.array()すると「RGB順」のまま入る - body_np_rgb = np.array(body_img_pil) + body_np_rgb = np.array(body_img_pil) # RGB順 + # 結果格納用ディレクトリの準備 dirs = {"marked": "marked_images"} if CONV_ENABLED: dirs["conv"] = "conv" @@ -919,6 +890,7 @@ if EARSNET_CROP_ENABLED: dirs["earsnet_crop"] = "earsnet_crop" + # 常に combined も作成 dirs["combined"] = "combined" os.makedirs(os.path.join(results_dir, "marked_images"), exist_ok=True) @@ -932,8 +904,10 @@ exist_ok=True, ) + # ★ポイントを各メソッド別に保持(1動画につき1回リセット) points = {key: [] for key in dirs.keys() if key not in ["marked", "combined"]} + # 色設定 colors = { "conv": CONV_COLOR, "Xgboost": XGBOOST_COLOR, @@ -944,6 +918,7 @@ "earsnet_crop": (255, 51, 255), } + # (1) 各フレームにマーキング + BodyF.pngへ軌跡描画 for _, row in df.iterrows(): original_image_path = os.path.join(original_images_dir, row["image_file_name"]) if not os.path.exists(original_image_path): @@ -952,10 +927,9 @@ if original_image is None: continue - # --- 1) マーキング --- + # ---- (1-A) 各フレームへのマーキング (肩・腰・聴診器) pil_marked = Image.fromarray(cv2.cvtColor(original_image, cv2.COLOR_BGR2RGB)) draw_marked = ImageDraw.Draw(pil_marked) - for point in [ "left_shoulder", "right_shoulder", @@ -965,22 +939,21 @@ ]: col_x = f"{point}_x" col_y = f"{point}_y" - if col_x in row and col_y in row: - val_x = row[col_x] - val_y = row[col_y] - if pd.isna(val_x) or pd.isna(val_y): - continue - x, y = int(val_x), int(val_y) + if ( + col_x in row + and col_y in row + and not pd.isna(row[col_x]) + and not pd.isna(row[col_y]) + ): + x, y = int(row[col_x]), int(row[col_y]) pillow_draw_circle(draw_marked, (x, y), 5, fill=(255, 255, 0)) - # Pillow(RGB) → BGR marked_rgb = np.array(pil_marked) marked_bgr = cv2.cvtColor(marked_rgb, cv2.COLOR_RGB2BGR) marked_dir = os.path.join(results_dir, "marked_images") cv2.imwrite(os.path.join(marked_dir, row["image_file_name"]), marked_bgr) - # --- 2) BodyF.pngに軌跡描画 --- - # body_np_rgbをコピー (RGB配列) + # ---- (1-B) BodyF.png へ各メソッドの軌跡を描画 combined_image_with_traj_rgb = body_np_rgb.copy() combined_image_without_traj_rgb = body_np_rgb.copy() @@ -989,22 +962,21 @@ draw_with_traj = ImageDraw.Draw(pil_with_traj) draw_without_traj = ImageDraw.Draw(pil_without_traj) + # 各メソッドの推定結果(earsnet, conv, xgboost, lightGBM等)を取得 for key in points.keys(): col_x = f"{key}_stethoscope_x" col_y = f"{key}_stethoscope_y" if col_x not in row or col_y not in row: continue - val_x = row[col_x] - val_y = row[col_y] - if pd.isna(val_x) or pd.isna(val_y): + if pd.isna(row[col_x]) or pd.isna(row[col_y]): continue - x, y = int(val_x), int(val_y) + x, y = int(row[col_x]), int(row[col_y]) points[key].append((x, y)) color = colors[key] if key in colors else (0, 0, 255) - # 個別 with trajectory + # (A) 個別 with trajectory indiv_with_traj_rgb = body_np_rgb.copy() pil_indiv_with = Image.fromarray(indiv_with_traj_rgb) draw_indiv_with = ImageDraw.Draw(pil_indiv_with) @@ -1013,68 +985,50 @@ pillow_draw_polyline(draw_indiv_with, points[key], color=color, width=2) pillow_draw_circle(draw_indiv_with, (x, y), 10, fill=color) - indiv_with_traj_rgb2 = np.array(pil_indiv_with) - # RGB -> BGR - indiv_with_traj_bgr = cv2.cvtColor(indiv_with_traj_rgb2, cv2.COLOR_RGB2BGR) + indiv_with_traj_np = np.array(pil_indiv_with) + indiv_with_traj_bgr = cv2.cvtColor(indiv_with_traj_np, cv2.COLOR_RGB2BGR) out_path_with = os.path.join( results_dir, f"{dirs[key]}_with_trajectory", row["image_file_name"] ) cv2.imwrite(out_path_with, indiv_with_traj_bgr) - # 個別 without trajectory + # (B) 個別 without trajectory indiv_without_traj_rgb = body_np_rgb.copy() pil_indiv_without = Image.fromarray(indiv_without_traj_rgb) draw_indiv_without = ImageDraw.Draw(pil_indiv_without) pillow_draw_circle(draw_indiv_without, (x, y), 10, fill=color) - indiv_without_traj_rgb2 = np.array(pil_indiv_without) + indiv_without_traj_np = np.array(pil_indiv_without) indiv_without_traj_bgr = cv2.cvtColor( - indiv_without_traj_rgb2, cv2.COLOR_RGB2BGR + indiv_without_traj_np, cv2.COLOR_RGB2BGR ) out_path_without = os.path.join( results_dir, f"{dirs[key]}_without_trajectory", row["image_file_name"] ) cv2.imwrite(out_path_without, indiv_without_traj_bgr) - # combined with trajectory + # (C) combined with trajectory if len(points[key]) > 1: pillow_draw_polyline(draw_with_traj, points[key], color=color, width=2) pillow_draw_circle(draw_with_traj, (x, y), 10, fill=color) - # combined without trajectory + # (D) combined without trajectory pillow_draw_circle(draw_without_traj, (x, y), 10, fill=color) # 結果 (pil_with_traj / pil_without_traj) を BGR に変換して保存 - combined_with_traj_rgb2 = np.array(pil_with_traj) # RGB - combined_without_traj_rgb2 = np.array(pil_without_traj) # RGB - combined_with_traj_bgr = cv2.cvtColor( - combined_with_traj_rgb2, cv2.COLOR_RGB2BGR - ) - combined_without_traj_bgr = cv2.cvtColor( - combined_without_traj_rgb2, cv2.COLOR_RGB2BGR - ) + cwt_np = np.array(pil_with_traj) + cwt_bgr = cv2.cvtColor(cwt_np, cv2.COLOR_RGB2BGR) + cwd = os.path.join(results_dir, "combined_with_trajectory") + os.makedirs(cwd, exist_ok=True) + cv2.imwrite(os.path.join(cwd, row["image_file_name"]), cwt_bgr) - os.makedirs( - os.path.join(results_dir, "combined_with_trajectory"), exist_ok=True - ) - os.makedirs( - os.path.join(results_dir, "combined_without_trajectory"), exist_ok=True - ) + cwo_np = np.array(pil_without_traj) + cwo_bgr = cv2.cvtColor(cwo_np, cv2.COLOR_RGB2BGR) + cwod = os.path.join(results_dir, "combined_without_trajectory") + os.makedirs(cwod, exist_ok=True) + cv2.imwrite(os.path.join(cwod, row["image_file_name"]), cwo_bgr) - cv2.imwrite( - os.path.join( - results_dir, "combined_with_trajectory", row["image_file_name"] - ), - combined_with_traj_bgr, - ) - cv2.imwrite( - os.path.join( - results_dir, "combined_without_trajectory", row["image_file_name"] - ), - combined_without_traj_bgr, - ) - - # 動画化 + # (2) 動画化 create_video_from_images( os.path.join(results_dir, "marked_images"), os.path.join(results_dir, "marked_video.mp4"), @@ -1124,6 +1078,9 @@ print(f"Created video: {output_path}") +############################################################################### +# メイン +############################################################################### def main(): parser = argparse.ArgumentParser(description="Process video and generate results.") parser.add_argument( @@ -1137,6 +1094,7 @@ help="Directory to save output images and results", ) + # RTMpose用 det_config = "modules/rtmpose/mmdetection_cfg/rtmdet_m_640-8xb32_coco-person.py" det_checkpoint = ( "models/rtmpose/rtmdet_m_8xb32-100e_coco-obj365-person-235e8209.pth" @@ -1150,12 +1108,15 @@ args = parser.parse_args() os.makedirs(args.output_dir, exist_ok=True) + # (1) FPSモニタースレッド (推論のみ計測) fps_thread = Thread(target=fps_monitor, args=(1.0,), daemon=True) fps_thread.start() + # (2) 動画→フレーム frames_dir = os.path.join(args.output_dir, "frames") video_to_frames(args.video_path, frames_dir) + # (3) RTMposeの初期化 (必要であれば) if RTMPOSE_ENABLED: detector = init_detector(det_config, det_checkpoint, device=DEVICE) detector.cfg = adapt_mmdet_pipeline(detector.cfg) @@ -1166,11 +1127,12 @@ visualizer.set_dataset_meta( pose_estimator.dataset_meta, skeleton_style="mmpose" ) - process_images(args, detector, pose_estimator, visualizer) else: + # PoseNet or no keypoints usage process_images(args, None, None, None) + # (4) スレッド終了 global stop_fps_thread stop_fps_thread = True fps_thread.join()