diff --git a/main.py b/main.py index ad37402..65c1e1e 100644 --- a/main.py +++ b/main.py @@ -3,6 +3,8 @@ import os import pickle import re +import time +from threading import Thread import cv2 import numpy as np @@ -46,7 +48,54 @@ # Get normalization setting NORMALIZE_ENABLED = config.NORMALIZE_ENABLED +############################################################################### +# リアルタイムFPS計測用のグローバル変数&スレッド定義 +############################################################################### +processed_frames = 0 # 処理済みフレーム数(メインスレッドでインクリメント) +stop_fps_thread = False # スレッド終了フラグ +# 必要に応じてリアルタイムFPSの履歴を保存するリスト (後でCSV化したい場合) +fps_history = [] + + +def fps_monitor(interval=1.0): + """ + 別スレッドとして起動し、一定時間おきに processed_frames を確認してリアルタイムFPSを計算する。 + interval=1.0 なら1秒ごとにFPSを出力。 + """ + global processed_frames, stop_fps_thread, fps_history + + last_count = 0 + last_time = time.time() + + while not stop_fps_thread: + time.sleep(interval) + now = time.time() + + current_count = processed_frames + frames_delta = current_count - last_count + time_delta = now - last_time + + if time_delta > 0: + current_fps = frames_delta / time_delta + else: + current_fps = 0.0 + + print( + f"[FPS Monitor] Real-time FPS: {current_fps:.2f} (frames: +{frames_delta})" + ) + + # 履歴を残したい場合は下記を使用 + fps_history.append((now, current_fps)) + + # カウント更新 + last_count = current_count + last_time = now + + +############################################################################### +# 以下は従来の処理 (姿勢推定、聴診器検出、FPS計測など) +############################################################################### def load_model(model_path, model_type="lgb"): with open(model_path, "rb") as model_file: return pickle.load(model_file) @@ -109,20 +158,20 @@ j = n - 1 for i in range(n): - if (vertices[i][1] > y) != (vertices[j][1] > y) and ( - x - < (vertices[j][0] - vertices[i][0]) - * (y - vertices[i][1]) - / (vertices[j][1] - vertices[i][1]) - + vertices[i][0] - ): - inside = not inside + if (vertices[i][1] > y) != (vertices[j][1] > y): + slope = (vertices[j][0] - vertices[i][0]) / ( + vertices[j][1] - vertices[i][1] + ) + intersect_x = slope * (y - vertices[i][1]) + vertices[i][0] + if x < intersect_x: + inside = not inside j = i return inside def yolox_detector_inference(frame, yolox_inferencer, pose_keypoints, score_thr=0.3): + """YOLOXで聴診器を検出し、ポリゴン内部にある聴診器の中心座標を返す。""" frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) result = yolox_inferencer(inputs=frame_rgb, return_vis=True) predictions = result["predictions"][0] @@ -130,12 +179,14 @@ stethoscope_y = None max_score = -1 + # keypoints 配列から部位を取得 nose = pose_keypoints[0] left_shoulder = pose_keypoints[5] right_shoulder = pose_keypoints[6] left_hip = pose_keypoints[11] right_hip = pose_keypoints[12] + # 肩と腰を大きめに外に広げる expanded_left_shoulder, expanded_right_shoulder = expand_points( left_shoulder, right_shoulder ) @@ -154,7 +205,7 @@ for i, (label, score) in enumerate( zip(predictions["labels"], predictions["scores"]) ): - if score >= score_thr and label == 0: # label 0 is stethoscope + if score >= score_thr and label == 0: # label=0 を聴診器として判定 bbox = predictions["bboxes"][i] center_x = (bbox[0] + bbox[2]) / 2 center_y = (bbox[1] + bbox[3]) / 2 @@ -186,6 +237,7 @@ def normalize_quadrilateral_with_point(points, extra_point): + """4点(肩・肩・腰・腰)と任意の1点(聴診器)を正規化して返す。""" all_points = np.vstack([points.reshape(-1, 2), extra_point]) center = np.mean(points.reshape(-1, 2), axis=0) centered_points = all_points - center @@ -257,7 +309,13 @@ def process_images(args, detector, pose_estimator, visualizer): + """ + メインスレッドでフレームごとの推論を行う。 + 別スレッドでリアルタイムFPSを計測しているため、 + フレーム処理終了後に processed_frames をインクリメントする。 + """ print("Starting process_images function...") + global processed_frames # 別スレッドと共有 ears_ai = EarsAI() calc_position = CalcStethoscopePosition() base_dir = os.path.join(args.output_dir, "frames") @@ -279,25 +337,116 @@ rows = [] normalized_rows = [] - yolox_inferencer = init_yolox() + + # ---------------- + # YOLOX 初期化 + # ---------------- + yolox_inferencer = None + if YOLOX_ENABLED: + yolox_inferencer = init_yolox() + + # ---------------- + # 時間計測用 dict + # ---------------- + timings = { + # 単体推論 + "rtmpose_single": [], + "yolox_single": [], + "conv_single": [], + "lightgbm_single": [], + "xgboost_single": [], + "earsnet_single": [], + # パイプライン推論 + "pipeline_rtmpose_yolox_conv": [], + "pipeline_rtmpose_yolox_lightgbm": [], + "pipeline_rtmpose_yolox_xgboost": [], + "pipeline_earsnet": [], + } + + # ---------------- + # 各モデルの事前ロード + # ---------------- + if LIGHTGBM_ENABLED: + lgb_model_x = load_model("./models/LightGBM/stethoscope_calc_x_best_model.pkl") + lgb_model_y = load_model("./models/LightGBM/stethoscope_calc_y_best_model.pkl") + lgb_scaler_x = load_scaler("./models/LightGBM/scaler-x.pkl") + lgb_scaler_y = load_scaler("./models/LightGBM/scaler-y.pkl") + if XGBOOST_ENABLED: + xg_model_x = load_model("./models/XGBoost/stethoscope_calc_x_best_model.pkl") + xg_model_y = load_model("./models/XGBoost/stethoscope_calc_y_best_model.pkl") + xg_scaler_x = load_scaler("./models/XGBoost/scaler-x.pkl") + xg_scaler_y = load_scaler("./models/XGBoost/scaler-y.pkl") + if CATBOOST_ENABLED: + catboost_model_x = load_model( + "./models/CatBoost/stethoscope_calc_x_best_model.pkl" + ) + catboost_model_y = load_model( + "./models/CatBoost/stethoscope_calc_y_best_model.pkl" + ) + if NGBOOST_ENABLED: + ngboost_model_x = load_model( + "./models/NGBoost/stethoscope_calc_x_best_model.pkl" + ) + ngboost_model_y = load_model( + "./models/NGBoost/stethoscope_calc_y_best_model.pkl" + ) + if EARSNET_ENABLED: + earsnet_predictor = load_earsnet_model( + model_path="models/EARSNet/best_model.pth", + model_type="resnet", + model_version="18", + ) + + input_columns = [ + "left_shoulder_x", + "left_shoulder_y", + "right_shoulder_x", + "right_shoulder_y", + "left_hip_x", + "left_hip_y", + "right_hip_x", + "right_hip_y", + "stethoscope_x", + "stethoscope_y", + ] + + # ------------------------------------------------------------ + # メインループ(フレームごとに処理) + # ------------------------------------------------------------ for image_file_name in png_files: - print(f"Processing image: {image_file_name}") image_path = os.path.join(base_dir, image_file_name) frame = cv2.imread(image_path) if frame is None: print(f"Failed to load image: {image_path}") continue + # ============================================================ + # (1) PoseNet or RTMPOSE による姿勢推定(肩・腰座標取得) + # ============================================================ + left_shoulder = (0, 0) + right_shoulder = (0, 0) + left_hip = (0, 0) + right_hip = (0, 0) + pose_overlay_img = frame.copy() + if POSENET_ENABLED: - pose_overlay_img, *landmarks = ears_ai.pose_detect(frame, None) + # ▼ PoseNet + start_time_pose = time.time() + pose_overlay_img, *landmarks = EarsAI().pose_detect(frame, None) + end_time_pose = time.time() + timings["rtmpose_single"].append(end_time_pose - start_time_pose) + left_shoulder = landmarks[0] right_shoulder = landmarks[1] left_hip = landmarks[2] right_hip = landmarks[3] - if RTMPOSE_ENABLED: - frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) - det_result = inference_detector(detector, frame) + elif RTMPOSE_ENABLED: + # ▼ RTMPOSE + frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) + + start_time_pose = time.time() + det_result = inference_detector(detector, frame_rgb) pred_instance = det_result.pred_instances.cpu().numpy() bboxes = np.concatenate( (pred_instance.bboxes, pred_instance.scores[:, None]), axis=1 @@ -306,11 +455,17 @@ np.logical_and(pred_instance.labels == 0, pred_instance.scores > 0.3) ] bboxes = bboxes[nms(bboxes, 0.3), :4] - pose_results = inference_topdown(pose_estimator, frame, bboxes) + pose_results = inference_topdown(pose_estimator, frame_rgb, bboxes) data_samples = merge_data_samples(pose_results) pose_keypoints = extract_keypoints_rtmpose(pose_results) + end_time_pose = time.time() + + timings["rtmpose_single"].append(end_time_pose - start_time_pose) + if pose_keypoints is None: print(f"Failed to extract keypoints for image: {image_path}") + # 処理ができなかった場合もフレーム処理は一応完了するのでincrement + processed_frames += 1 continue left_shoulder = pose_keypoints[5] @@ -318,10 +473,11 @@ left_hip = pose_keypoints[11] right_hip = pose_keypoints[12] + # 可視化 if visualizer is not None: visualizer.add_datasample( "result", - frame, + frame_rgb, data_sample=data_samples, draw_gt=False, draw_heatmap=False, @@ -332,41 +488,80 @@ wait_time=0, kpt_thr=0.3, ) - pose_overlay_img = visualizer.get_image() + pose_overlay_img = visualizer.get_image() # (RGB) + # ============================================================ + # (2) YOLOX or SSD で聴診器の推定(必要に応じて) + # ============================================================ + stethoscope_overlay_img = frame.copy() + stethoscope_x = 0 + stethoscope_y = 0 + + # SSD (MobileNetV1SSD) if MobileNetV1SSD_ENABLED: - stethoscope_overlay_img, stethoscope_x, stethoscope_y = ears_ai.ssd_detect( + start_time_ssd = time.time() + stethoscope_overlay_img, stethoscope_x, stethoscope_y = EarsAI().ssd_detect( frame, None ) + end_time_ssd = time.time() + # もし単体測定したい場合は "ssd_single" などに追加 - if YOLOX_ENABLED and pose_keypoints is not None: - stethoscope_overlay_img, stethoscope_x, stethoscope_y = ( - yolox_detector_inference( - frame, - yolox_inferencer, - pose_keypoints, + # YOLOX + if YOLOX_ENABLED: + if RTMPOSE_ENABLED and pose_keypoints is not None: + start_time_yolox = time.time() + (stethoscope_overlay_img, stethoscope_x, stethoscope_y) = ( + yolox_detector_inference(frame, yolox_inferencer, pose_keypoints) ) - ) + end_time_yolox = time.time() + timings["yolox_single"].append(end_time_yolox - start_time_yolox) - if RTMPOSE_ENABLED and YOLOX_ENABLED: - cv2.imwrite( - os.path.join(pose_overlay_dir, image_file_name), pose_overlay_img - ) + elif POSENET_ENABLED: + # PoseNetの場合 keypoints形式を整える + pose_keypoints_pose_net = [[0, 0]] * 13 + pose_keypoints_pose_net[5] = left_shoulder + pose_keypoints_pose_net[6] = right_shoulder + pose_keypoints_pose_net[11] = left_hip + pose_keypoints_pose_net[12] = right_hip + + start_time_yolox = time.time() + (stethoscope_overlay_img, stethoscope_x, stethoscope_y) = ( + yolox_detector_inference( + frame, yolox_inferencer, pose_keypoints_pose_net + ) + ) + end_time_yolox = time.time() + timings["yolox_single"].append(end_time_yolox - start_time_yolox) + + # --------------------------------------------------------- + # 可視化結果を保存 (pose, stethoscope) + # --------------------------------------------------------- + if (RTMPOSE_ENABLED or POSENET_ENABLED) and ( + YOLOX_ENABLED or MobileNetV1SSD_ENABLED + ): + # PoseはRGB->BGR に変換(RTMPOSE時) + if RTMPOSE_ENABLED: + cv2.imwrite( + os.path.join(pose_overlay_dir, image_file_name), + cv2.cvtColor(pose_overlay_img, cv2.COLOR_RGB2BGR), + ) + else: + # PoseNetなら BGR のまま + cv2.imwrite( + os.path.join(pose_overlay_dir, image_file_name), + pose_overlay_img, + ) + cv2.imwrite( os.path.join(stethoscope_overlay_dir, image_file_name), stethoscope_overlay_img, ) - else: - cv2.imwrite( - os.path.join(pose_overlay_dir, image_file_name), - cv2.cvtColor(pose_overlay_img, cv2.COLOR_RGB2BGR), - ) - cv2.imwrite( - os.path.join(stethoscope_overlay_dir, image_file_name), - cv2.cvtColor(stethoscope_overlay_img, cv2.COLOR_RGB2BGR), - ) + # ============================================================ + # (3) CSV用に肩・腰・聴診器座標をまとめる + # ============================================================ if POSENET_ENABLED: + # PoseNet が (y, x) row = { "image_file_name": image_file_name, "left_shoulder_x": left_shoulder[1], @@ -380,7 +575,8 @@ "stethoscope_x": stethoscope_x, "stethoscope_y": stethoscope_y, } - elif RTMPOSE_ENABLED: + else: + # RTMPOSE の場合 (x, y) row = { "image_file_name": image_file_name, "left_shoulder_x": left_shoulder[0], @@ -394,14 +590,10 @@ "stethoscope_x": stethoscope_x, "stethoscope_y": stethoscope_y, } - else: - print( - "No pose estimation method enabled. Please enable either PoseNet or RTMPose." - ) - continue rows.append(row) + # 正規化 source_points = np.array( [ [float(row[f"{pos}_x"]), float(row[f"{pos}_y"])] @@ -415,7 +607,6 @@ normalized_points = normalize_quadrilateral_with_point( source_points.flatten(), stethoscope_point ) - normalized_row = { "image_file_name": image_file_name, "left_shoulder_x": normalized_points[0, 0], @@ -431,6 +622,188 @@ } normalized_rows.append(normalized_row) + # ============================================================ + # (4) 各パイプラインの FPS計測(例: RTMPOSE+YOLOX+conv, etc.) + # ============================================================ + # 以下は例として「改めて同じフレームをRTMPOSE+YOLOX+各種手法」で測定。 + # パイプラインごとにRTMPOSEとYOLOXを呼び直すため、処理時間は増加します。 + # もし重複呼び出しを避けたければ実装を見直してください。 + # -- (A) RTMPOSE + YOLOX + conv + if RTMPOSE_ENABLED and YOLOX_ENABLED and CONV_ENABLED: + start_pipeline = time.time() + + # 1) rtmpose (再度推定) + start_rtmpose = time.time() + frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) + det_result = inference_detector(detector, frame_rgb) + pred_instance = det_result.pred_instances.cpu().numpy() + bboxes = np.concatenate( + (pred_instance.bboxes, pred_instance.scores[:, None]), axis=1 + ) + bboxes = bboxes[ + np.logical_and(pred_instance.labels == 0, pred_instance.scores > 0.3) + ] + bboxes = bboxes[nms(bboxes, 0.3), :4] + pose_results = inference_topdown(pose_estimator, frame_rgb, bboxes) + keypoints_conv_pipeline = extract_keypoints_rtmpose(pose_results) + end_rtmpose = time.time() + timings["rtmpose_single"].append(end_rtmpose - start_rtmpose) + + # 2) YOLOX (再度推定) + s_x_tmp, s_y_tmp = 0, 0 + if keypoints_conv_pipeline is not None: + start_yolox = time.time() + _, s_x_tmp, s_y_tmp = yolox_detector_inference( + frame, yolox_inferencer, keypoints_conv_pipeline + ) + end_yolox = time.time() + timings["yolox_single"].append(end_yolox - start_yolox) + + # 3) conv + if keypoints_conv_pipeline is not None: + start_conv = time.time() + source_pts = np.array( + [ + keypoints_conv_pipeline[5], + keypoints_conv_pipeline[6], + keypoints_conv_pipeline[11], + keypoints_conv_pipeline[12], + ], + dtype=np.float32, + ) + if s_x_tmp != 0 or s_y_tmp != 0: + _ = calc_position.calc_affine(source_pts, s_x_tmp, s_y_tmp) + end_conv = time.time() + timings["conv_single"].append(end_conv - start_conv) + + end_pipeline = time.time() + timings["pipeline_rtmpose_yolox_conv"].append(end_pipeline - start_pipeline) + + # -- (B) RTMPOSE + YOLOX + LightGBM + if RTMPOSE_ENABLED and YOLOX_ENABLED and LIGHTGBM_ENABLED: + start_pipeline = time.time() + # 1) RTMPOSE + start_rtmpose = time.time() + frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) + det_result = inference_detector(detector, frame_rgb) + pred_instance = det_result.pred_instances.cpu().numpy() + bboxes = np.concatenate( + (pred_instance.bboxes, pred_instance.scores[:, None]), axis=1 + ) + bboxes = bboxes[ + np.logical_and(pred_instance.labels == 0, pred_instance.scores > 0.3) + ] + bboxes = bboxes[nms(bboxes, 0.3), :4] + pose_results = inference_topdown(pose_estimator, frame_rgb, bboxes) + keypoints_lgb_pipeline = extract_keypoints_rtmpose(pose_results) + end_rtmpose = time.time() + timings["rtmpose_single"].append(end_rtmpose - start_rtmpose) + + # 2) YOLOX + s_x_tmp, s_y_tmp = 0, 0 + if keypoints_lgb_pipeline is not None: + start_yolox = time.time() + _, s_x_tmp, s_y_tmp = yolox_detector_inference( + frame, yolox_inferencer, keypoints_lgb_pipeline + ) + end_yolox = time.time() + timings["yolox_single"].append(end_yolox - start_yolox) + + # 3) LightGBM + if s_x_tmp != 0 or s_y_tmp != 0: + input_data = ( + pd.DataFrame([row]) + if not NORMALIZE_ENABLED + else pd.DataFrame([normalized_row]) + ) + start_lgb = time.time() + # x 座標予測 + X_scaled_x = lgb_scaler_x.transform(input_data[input_columns]) + _ = lgb_model_x.predict(X_scaled_x) + # y 座標予測 + X_scaled_y = lgb_scaler_y.transform(input_data[input_columns]) + _ = lgb_model_y.predict(X_scaled_y) + end_lgb = time.time() + timings["lightgbm_single"].append(end_lgb - start_lgb) + + end_pipeline = time.time() + timings["pipeline_rtmpose_yolox_lightgbm"].append( + end_pipeline - start_pipeline + ) + + # -- (C) RTMPOSE + YOLOX + XGBoost + if RTMPOSE_ENABLED and YOLOX_ENABLED and XGBOOST_ENABLED: + start_pipeline = time.time() + # 1) RTMPOSE + start_rtmpose = time.time() + frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) + det_result = inference_detector(detector, frame_rgb) + pred_instance = det_result.pred_instances.cpu().numpy() + bboxes = np.concatenate( + (pred_instance.bboxes, pred_instance.scores[:, None]), axis=1 + ) + bboxes = bboxes[ + np.logical_and(pred_instance.labels == 0, pred_instance.scores > 0.3) + ] + bboxes = bboxes[nms(bboxes, 0.3), :4] + pose_results = inference_topdown(pose_estimator, frame_rgb, bboxes) + keypoints_xgb_pipeline = extract_keypoints_rtmpose(pose_results) + end_rtmpose = time.time() + timings["rtmpose_single"].append(end_rtmpose - start_rtmpose) + + # 2) YOLOX + s_x_tmp, s_y_tmp = 0, 0 + if keypoints_xgb_pipeline is not None: + start_yolox = time.time() + _, s_x_tmp, s_y_tmp = yolox_detector_inference( + frame, yolox_inferencer, keypoints_xgb_pipeline + ) + end_yolox = time.time() + timings["yolox_single"].append(end_yolox - start_yolox) + + # 3) XGBoost + if s_x_tmp != 0 or s_y_tmp != 0: + input_data = ( + pd.DataFrame([row]) + if not NORMALIZE_ENABLED + else pd.DataFrame([normalized_row]) + ) + start_xgb = time.time() + # x座標 + X_scaled_x = xg_scaler_x.transform(input_data[input_columns]) + _ = xg_model_x.predict(X_scaled_x) + # y座標 + X_scaled_y = xg_scaler_y.transform(input_data[input_columns]) + _ = xg_model_y.predict(X_scaled_y) + end_xgb = time.time() + timings["xgboost_single"].append(end_xgb - start_xgb) + + end_pipeline = time.time() + timings["pipeline_rtmpose_yolox_xgboost"].append( + end_pipeline - start_pipeline + ) + + # -- (D) EARSNET パイプライン(EARSNET 単体) + if EARSNET_ENABLED: + start_pipeline_earsnet = time.time() + # EARSNET の単体推論 + start_earsnet = time.time() + _ = predict_earsnet(earsnet_predictor, image_path) + end_earsnet = time.time() + timings["earsnet_single"].append(end_earsnet - start_earsnet) + + end_pipeline_earsnet = time.time() + timings["pipeline_earsnet"].append( + end_pipeline_earsnet - start_pipeline_earsnet + ) + + # フレームごとの処理が完了したらカウンタをインクリメント + processed_frames += 1 + + # ======================================================================== + # (5) 各フレームの位置推定(Conv, LightGBM, XGBoost, CatBoost, NGBoost, EARSNET) + # → CSV 書き込み + # ======================================================================== if rows: print(f"Writing {len(rows)} rows to CSV...") fieldnames = list(rows[0].keys()) @@ -447,72 +820,20 @@ if EARSNET_ENABLED: fieldnames.extend(["earsnet_stethoscope_x", "earsnet_stethoscope_y"]) - # ★ 変更箇所: scalerをX座標用とY座標用で読み込む - if LIGHTGBM_ENABLED: - lgb_model_x = load_model( - "./models/LightGBM/stethoscope_calc_x_best_model.pkl" - ) - lgb_model_y = load_model( - "./models/LightGBM/stethoscope_calc_y_best_model.pkl" - ) - lgb_scaler_x = load_scaler( - "./models/LightGBM/scaler-x.pkl" - ) # X座標用scaler - lgb_scaler_y = load_scaler( - "./models/LightGBM/scaler-y.pkl" - ) # Y座標用scaler - if XGBOOST_ENABLED: - xg_model_x = load_model( - "./models/XGBoost/stethoscope_calc_x_best_model.pkl" - ) - xg_model_y = load_model( - "./models/XGBoost/stethoscope_calc_y_best_model.pkl" - ) - xg_scaler_x = load_scaler("./models/XGBoost/scaler-x.pkl") # X座標用scaler - xg_scaler_y = load_scaler("./models/XGBoost/scaler-y.pkl") # Y座標用scaler - if CATBOOST_ENABLED: - catboost_model_x = load_model( - "./models/CatBoost/stethoscope_calc_x_best_model.pkl" - ) - catboost_model_y = load_model( - "./models/CatBoost/stethoscope_calc_y_best_model.pkl" - ) - if NGBOOST_ENABLED: - ngboost_model_x = load_model( - "./models/NGBoost/stethoscope_calc_x_best_model.pkl" - ) - ngboost_model_y = load_model( - "./models/NGBoost/stethoscope_calc_y_best_model.pkl" - ) - if EARSNET_ENABLED: - earsnet_predictor = load_earsnet_model( - model_path="models/EARSNet/best_model-50-F2.pth", - model_type="resnet", - model_version="50", - ) + os.makedirs(results_dir, exist_ok=True) - input_columns = [ - "left_shoulder_x", - "left_shoulder_y", - "right_shoulder_x", - "right_shoulder_y", - "left_hip_x", - "left_hip_y", - "right_hip_x", - "right_hip_y", - "stethoscope_x", - "stethoscope_y", - ] - + csvfile_path = os.path.join(results_dir, "results.csv") + normfile_path = os.path.join(results_dir, "results-convert.csv") with ( - open(csv_path, "w", newline="") as csvfile, - open(normalized_csv_path, "w", newline="") as norm_csvfile, + open(csvfile_path, "w", newline="") as csvfile, + open(normfile_path, "w", newline="") as norm_csvfile, ): writer = csv.DictWriter(csvfile, fieldnames=fieldnames) norm_writer = csv.DictWriter(norm_csvfile, fieldnames=fieldnames) writer.writeheader() norm_writer.writeheader() + # 前回値を保持する辞書 prev_values = {} if CONV_ENABLED: prev_values["conv"] = (180, 180) @@ -527,12 +848,14 @@ if EARSNET_ENABLED: prev_values["earsnet"] = (180, 180) - for i, (row, norm_row) in enumerate(zip(rows, normalized_rows)): - if NORMALIZE_ENABLED: - input_data = pd.DataFrame([norm_row]) - else: - input_data = pd.DataFrame([row]) + for row, norm_row in zip(rows, normalized_rows): + input_data = ( + pd.DataFrame([norm_row]) + if NORMALIZE_ENABLED + else pd.DataFrame([row]) + ) + # 聴診器未検出の場合 if row["stethoscope_x"] == 0 and row["stethoscope_y"] == 0: for key in prev_values: row[f"{key}_stethoscope_x"], row[f"{key}_stethoscope_y"] = ( @@ -545,7 +868,8 @@ else: # conv if CONV_ENABLED: - source_points = np.array( + start_time_conv = time.time() + source_pts = np.array( [ [float(row[f"{pos}_x"]), float(row[f"{pos}_y"])] for pos in [ @@ -557,11 +881,11 @@ ], dtype=np.float32, ) - stethoscope_point = np.array( + stetho_pt = np.array( [float(row["stethoscope_x"]), float(row["stethoscope_y"])] ) conv_stethoscope = calc_position.calc_affine( - source_points, *stethoscope_point + source_pts, *stetho_pt ) row["conv_stethoscope_x"], row["conv_stethoscope_y"] = ( conv_stethoscope @@ -570,13 +894,15 @@ norm_row["conv_stethoscope_x"], norm_row["conv_stethoscope_y"], ) = conv_stethoscope + end_time_conv = time.time() + timings["conv_single"].append(end_time_conv - start_time_conv) + prev_values["conv"] = conv_stethoscope - # LightGBM prediction + # LightGBM if LIGHTGBM_ENABLED: - # ★ X座標用scalerで標準化してXモデルで予測 + start_time_lgb = time.time() X_scaled_x = lgb_scaler_x.transform(input_data[input_columns]) lgb_x_pred = int(lgb_model_x.predict(X_scaled_x)[0]) - # ★ Y座標用scalerで標準化してYモデルで予測 X_scaled_y = lgb_scaler_y.transform(input_data[input_columns]) lgb_y_pred = int(lgb_model_y.predict(X_scaled_y)[0]) row["lightGBM_stethoscope_x"], row["lightGBM_stethoscope_y"] = ( @@ -587,10 +913,13 @@ norm_row["lightGBM_stethoscope_x"], norm_row["lightGBM_stethoscope_y"], ) = (lgb_x_pred, lgb_y_pred) + end_time_lgb = time.time() + timings["lightgbm_single"].append(end_time_lgb - start_time_lgb) + prev_values["lightGBM"] = (lgb_x_pred, lgb_y_pred) - # XGBoost prediction + # XGBoost if XGBOOST_ENABLED: - # ★ XGBoost用も同様に分けて適用 + start_time_xgb = time.time() X_scaled_x = xg_scaler_x.transform(input_data[input_columns]) xg_x_pred = int(xg_model_x.predict(X_scaled_x)[0]) X_scaled_y = xg_scaler_y.transform(input_data[input_columns]) @@ -603,10 +932,13 @@ norm_row["Xgboost_stethoscope_x"], norm_row["Xgboost_stethoscope_y"], ) = (xg_x_pred, xg_y_pred) + end_time_xgb = time.time() + timings["xgboost_single"].append(end_time_xgb - start_time_xgb) + prev_values["Xgboost"] = (xg_x_pred, xg_y_pred) - # CatBoost prediction + # CatBoost if CATBOOST_ENABLED: - # CatBoostはscaler未使用の例として据え置き(必要なら同様にscaler分けが可能) + start_time_cat = time.time() catboost_x = int( catboost_model_x.predict(input_data[input_columns])[0] ) @@ -621,9 +953,13 @@ norm_row["catboost_stethoscope_x"], norm_row["catboost_stethoscope_y"], ) = (catboost_x, catboost_y) + end_time_cat = time.time() + # timings["catboost_single"].append( ... ) # 必要なら追加 + prev_values["catboost"] = (catboost_x, catboost_y) - # NGBoost prediction + # NGBoost if NGBOOST_ENABLED: + start_time_ngb = time.time() ngboost_x = int( ngboost_model_x.predict(input_data[input_columns])[0] ) @@ -638,10 +974,13 @@ norm_row["ngboost_stethoscope_x"], norm_row["ngboost_stethoscope_y"], ) = (ngboost_x, ngboost_y) + end_time_ngb = time.time() + # timings["ngboost_single"].append( ... ) # 必要なら追加 + prev_values["ngboost"] = (ngboost_x, ngboost_y) - # EARSNET prediction + # EARSNET (再度実行する場合) if EARSNET_ENABLED: - image_path = os.path.join(base_dir, row["image_file_name"]) + start_time_enet = time.time() earsnet_coords = predict_earsnet(earsnet_predictor, image_path) row["earsnet_stethoscope_x"], row["earsnet_stethoscope_y"] = ( earsnet_coords @@ -650,23 +989,66 @@ norm_row["earsnet_stethoscope_x"], norm_row["earsnet_stethoscope_y"], ) = earsnet_coords - - for key in prev_values: - prev_values[key] = ( - row.get(f"{key}_stethoscope_x", prev_values[key][0]), - row.get(f"{key}_stethoscope_y", prev_values[key][1]), + end_time_enet = time.time() + timings["earsnet_single"].append( + end_time_enet - start_time_enet ) + prev_values["earsnet"] = earsnet_coords writer.writerow(row) norm_writer.writerow(norm_row) - print(f"Processed and saved results to: {csv_path}") - print(f"Processed and saved normalized results to: {normalized_csv_path}") + print(f"Processed and saved results to: {csvfile_path}") + print(f"Processed and saved normalized results to: {normfile_path}") - generate_visualizations(csv_path, base_dir, results_dir) + generate_visualizations(csvfile_path, base_dir, results_dir) else: print("No data to write to CSV.") + # ======================================================================== + # (6) FPS計算 & CSV保存 (サブコンポーネント&パイプラインごとの合計/平均) + # ======================================================================== + fps_data = [] + for method_name, time_list in timings.items(): + if not time_list: + continue + total_time = sum(time_list) + num_calls = len(time_list) + avg_time = total_time / num_calls if num_calls > 0 else 0 + fps = 1.0 / avg_time if avg_time > 0 else 0 + fps_data.append( + { + "method_name": method_name, + "num_calls": num_calls, + "total_time_sec": f"{total_time:.6f}", + "avg_time_sec": f"{avg_time:.6f}", + "fps": f"{fps:.2f}", + } + ) + + fps_csv_path = os.path.join(results_dir, "fps_results.csv") + with open(fps_csv_path, "w", newline="") as f: + writer = csv.DictWriter( + f, + fieldnames=[ + "method_name", + "num_calls", + "total_time_sec", + "avg_time_sec", + "fps", + ], + ) + writer.writeheader() + for row in fps_data: + writer.writerow(row) + + print("\n===== FPS Results (subcomponent & pipeline) =====") + for row in fps_data: + print( + f"{row['method_name']}: calls={row['num_calls']}, total={row['total_time_sec']}s, " + f"avg={row['avg_time_sec']}s, FPS={row['fps']}" + ) + def generate_visualizations(csv_path, original_images_dir, results_dir): df = pd.read_csv(csv_path) @@ -722,13 +1104,14 @@ "right_hip", "stethoscope", ]: - cv2.circle( - original_image, - (int(row[f"{point}_x"]), int(row[f"{point}_y"])), - 10, - (255, 255, 0), - -1, - ) + if point + "_x" in row and point + "_y" in row: + cv2.circle( + original_image, + (int(row[f"{point}_x"]), int(row[f"{point}_y"])), + 10, + (255, 255, 0), + -1, + ) cv2.imwrite( os.path.join(results_dir, "marked_images", row["image_file_name"]), @@ -739,9 +1122,6 @@ combined_image_without_traj = body_image.copy() for key in points: - if key == "combined": - continue - if f"{key}_stethoscope_x" not in row: continue @@ -754,10 +1134,12 @@ image_with_trajectory, [np.array(points[key])], False, - colors[key], + colors.get(key, (0, 0, 255)), 2, ) - cv2.circle(image_with_trajectory, (x, y), 10, colors[key], -1) + cv2.circle( + image_with_trajectory, (x, y), 10, colors.get(key, (0, 0, 255)), -1 + ) cv2.imwrite( os.path.join( results_dir, f"{dirs[key]}_with_trajectory", row["image_file_name"] @@ -770,14 +1152,24 @@ combined_image_with_traj, [np.array(points[key])], False, - colors[key], + colors.get(key, (0, 0, 255)), 2, ) - cv2.circle(combined_image_with_traj, (x, y), 10, colors[key], -1) - cv2.circle(combined_image_without_traj, (x, y), 10, colors[key], -1) + cv2.circle( + combined_image_with_traj, (x, y), 10, colors.get(key, (0, 0, 255)), -1 + ) + cv2.circle( + combined_image_without_traj, + (x, y), + 10, + colors.get(key, (0, 0, 255)), + -1, + ) image_without_trajectory = body_image.copy() - cv2.circle(image_without_trajectory, (x, y), 10, colors[key], -1) + cv2.circle( + image_without_trajectory, (x, y), 10, colors.get(key, (0, 0, 255)), -1 + ) cv2.imwrite( os.path.join( results_dir, @@ -865,9 +1257,21 @@ os.makedirs(args.output_dir, exist_ok=True) + # ------------------------- + # 1) FPSモニタ用スレッド開始 + # ------------------------- + fps_thread = Thread(target=fps_monitor, args=(1.0,), daemon=True) + fps_thread.start() + + # ------------------------- + # 2) 動画をフレームに分割 + # ------------------------- frames_dir = os.path.join(args.output_dir, "frames") video_to_frames(args.video_path, frames_dir) + # ------------------------- + # 3) RTMPOSE初期化 (必要に応じて) + # ------------------------- if RTMPOSE_ENABLED: detector = init_detector(det_config, det_checkpoint, device="cuda:0") detector.cfg = adapt_mmdet_pipeline(detector.cfg) @@ -881,8 +1285,25 @@ process_images(args, detector, pose_estimator, visualizer) else: + # RTMPOSE 未使用時 process_images(args, None, None, None) + # ------------------------- + # 4) スレッド終了指示・join + # ------------------------- + global stop_fps_thread + stop_fps_thread = True + fps_thread.join() + + # もし fps_history をCSV保存したい場合はここで行う + # with open("fps_history.csv", "w", newline="") as f: + # writer = csv.writer(f) + # writer.writerow(["timestamp", "fps"]) + # for timestamp, fps_value in fps_history: + # writer.writerow([timestamp, fps_value]) + + print("All done.") + if __name__ == "__main__": main()