diff --git a/Media1.mp4 b/Media1.mp4 new file mode 100644 index 0000000..83af48f --- /dev/null +++ b/Media1.mp4 Binary files differ diff --git a/Media2.mp4 b/Media2.mp4 new file mode 100644 index 0000000..90c70d6 --- /dev/null +++ b/Media2.mp4 Binary files differ diff --git a/config.py b/config.py index e20cb0a..72790e6 100644 --- a/config.py +++ b/config.py @@ -4,10 +4,12 @@ DEVICE = "cuda:0" # Colors for different models (B,G,R format) -CONV_COLOR = (0, 255, 0) -XGBOOST_COLOR = (98, 72, 16) -LIGHTGBM_COLOR = (98, 72, 16) -EARSNET_COLOR = (18, 53, 13) +CONV_COLOR = (0, 255, 0) # 純粋な緑 +XGBOOST_COLOR = (0, 165, 255) # オレンジ +LIGHTGBM_COLOR = (255, 0, 255) # マゼンタ +EARSNET_COLOR = (139, 0, 0) # ダークブルー +CATBOOST_COLOR = (0, 0, 255) # 赤 +NGBOOST_COLOR = (255, 255, 0) # シアン # Model execution settings CONV_ENABLED = True diff --git a/crop.py b/crop.py new file mode 100644 index 0000000..e2a83df --- /dev/null +++ b/crop.py @@ -0,0 +1,65 @@ +import argparse + +from moviepy.editor import VideoFileClip + + +def crop_video_time(input_path, output_path, start_time, end_time): + """ + 指定された時間範囲で動画をクロップする関数 + + Parameters: + ----------- + input_path : str + 入力動画のパス + output_path : str + 出力動画のパス + start_time : float + 開始時間(秒) + end_time : float + 終了時間(秒) + """ + try: + # 動画を読み込む + video = VideoFileClip(input_path) + + # 動画の長さをチェック + if end_time > video.duration: + print( + f"警告: 指定された終了時間({end_time}秒)が動画の長さ({video.duration}秒)を超えています" + ) + end_time = video.duration + + if start_time < 0: + print("警告: 開始時間は0秒以上である必要があります") + start_time = 0 + + if start_time >= end_time: + raise ValueError("開始時間は終了時間より小さい必要があります") + + # 指定された時間範囲で動画をクロップ + cropped_video = video.subclip(start_time, end_time) + + # 動画を保存 + cropped_video.write_videofile(output_path, codec="libx264", audio_codec="aac") + + # リソースを解放 + video.close() + cropped_video.close() + + print(f"動画のクロップが完了しました: {output_path}") + + except Exception as e: + print(f"エラーが発生しました: {str(e)}") + raise + + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description="動画の時間的クロップを行うスクリプト") + parser.add_argument("input_path", help="入力動画のパス") + parser.add_argument("output_path", help="出力動画のパス") + parser.add_argument("start_time", type=float, help="開始時間(秒)") + parser.add_argument("end_time", type=float, help="終了時間(秒)") + + args = parser.parse_args() + + crop_video_time(args.input_path, args.output_path, args.start_time, args.end_time) diff --git a/main.py b/main.py index 5e5f2d6..a3e5c4b 100644 --- a/main.py +++ b/main.py @@ -28,11 +28,15 @@ XGBOOST_COLOR = config.XGBOOST_COLOR LIGHTGBM_COLOR = config.LIGHTGBM_COLOR EARSNET_COLOR = config.EARSNET_COLOR +CATBOOST_COLOR = config.CATBOOST_COLOR +NGBOOST_COLOR = config.NGBOOST_COLOR # Get model execution settings CONV_ENABLED = config.CONV_ENABLED XGBOOST_ENABLED = config.XGBOOST_ENABLED LIGHTGBM_ENABLED = config.LIGHTGBM_ENABLED +CATBOOST_ENABLED = config.CATBOOST_ENABLED +NGBOOST_ENABLED = config.NGBOOST_ENABLED POSENET_ENABLED = config.POSENET_ENABLED RTMPOSE_ENABLED = config.RTMPOSE_ENABLED MobileNetV1SSD_ENABLED = config.MOBILENETV1SSD_ENABLED @@ -45,7 +49,7 @@ def init_yolox(): try: - # MMDetectionのデフォルトスコープを設定 + # Set MMDetection default scope from mmengine.registry import DefaultScope DefaultScope.get_instance("mmdet", scope_name="mmdet") @@ -66,36 +70,36 @@ def draw_polygon_and_detection(image, polygon_vertices, stethoscope_x, stethoscope_y): """ - 5角形の領域と検出された聴診器位置を描画する + Draw pentagon region and detected stethoscope position """ - # 画像のコピーを作成 + # Create a copy of the image overlay = image.copy() - # 5角形を描画 + # Draw pentagon vertices = polygon_vertices.astype(np.int32) - cv2.polylines(overlay, [vertices], True, (0, 255, 0), 2) # 緑色の線で5角形を描画 + cv2.polylines(overlay, [vertices], True, (0, 255, 0), 2) - # 検出された聴診器位置を描画(存在する場合) + # Draw detected stethoscope position (if exists) if stethoscope_x is not None and stethoscope_y is not None: center = (int(stethoscope_x), int(stethoscope_y)) - cv2.circle(overlay, center, 10, (255, 0, 0), -1) # 青色の円で検出位置を描画 - cv2.circle(overlay, center, 12, (255, 255, 255), 2) # 白い縁取り + cv2.circle(overlay, center, 10, (255, 0, 0), -1) # Blue circle for detection + cv2.circle(overlay, center, 12, (255, 255, 255), 2) # White border return overlay def expand_points(p1, p2): """ - 2点間の中点を変えずに距離を2倍に拡張する + Expand distance between two points by 2x while keeping midpoint """ mid_x = (p1[0] + p2[0]) / 2 mid_y = (p1[1] + p2[1]) / 2 - # 各点から中点へのベクトルを計算 + # Calculate vector from midpoint vec_x = p1[0] - mid_x vec_y = p1[1] - mid_y - # ベクトルを2倍に拡張 + # Expand vector by 2x new_p1 = [mid_x + vec_x * 2, mid_y + vec_y * 2] new_p2 = [mid_x - vec_x * 2, mid_y - vec_y * 2] @@ -104,7 +108,7 @@ def point_in_polygon(point, vertices): """ - 点が多角形の内部にあるかどうかを判定する + Determine if a point is inside a polygon """ x, y = point n = len(vertices) @@ -127,43 +131,43 @@ def yolox_detector_inference(frame, yolox_inferencer, pose_keypoints, score_thr=0.3): """ - YOLOXで聴診器を検出し、指定された体のポリゴン内の検出結果のみを返す + Detect stethoscope using YOLOX and return only detections within body polygon Args: - frame: 入力画像 - yolox_inferencer: YOLOXの推論モデル - pose_keypoints: RTMPoseで検出したキーポイント - score_thr: 検出スコアの閾値 + frame: Input image + yolox_inferencer: YOLOX inference model + pose_keypoints: Keypoints detected by RTMPose + score_thr: Detection score threshold Returns: - tuple: (オーバーレイ画像, 検出されたx座標, 検出されたy座標) + tuple: (overlay image, detected x coordinate, detected y coordinate) """ - # BGRからRGBに変換 + # Convert BGR to RGB frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) - # 検出を実行 + # Run detection result = yolox_inferencer(inputs=frame_rgb, return_vis=True) - # 検出結果を取得 + # Get predictions predictions = result["predictions"][0] stethoscope_x = None stethoscope_y = None max_score = -1 - # キーポイントから5角形の頂点を生成 + # Generate pentagon vertices from keypoints nose = pose_keypoints[0] left_shoulder = pose_keypoints[5] right_shoulder = pose_keypoints[6] left_hip = pose_keypoints[11] right_hip = pose_keypoints[12] - # 肩と腰の座標を拡張 + # Expand shoulder and hip coordinates expanded_left_shoulder, expanded_right_shoulder = expand_points( left_shoulder, right_shoulder ) expanded_left_hip, expanded_right_hip = expand_points(left_hip, right_hip) - # 5角形の頂点を定義 + # Define pentagon vertices polygon_vertices = np.array( [ nose, @@ -174,29 +178,29 @@ ] ) - # 各検出結果に対して処理 + # Process each detection for i, (label, score) in enumerate( zip(predictions["labels"], predictions["scores"]) ): - if score >= score_thr and label == 0: # label 0 は聴診器を示す + if score >= score_thr and label == 0: # label 0 is stethoscope bbox = predictions["bboxes"][i] - # 中心座標を計算 + # Calculate center coordinates center_x = (bbox[0] + bbox[2]) / 2 center_y = (bbox[1] + bbox[3]) / 2 - # 中心点が5角形の内部にあるか確認 + # Check if center point is inside pentagon if point_in_polygon([center_x, center_y], polygon_vertices): if score > max_score: stethoscope_x = center_x stethoscope_y = center_y max_score = score - # 検出結果がない場合は0,0を返す + # Return 0,0 if no detection if stethoscope_x is None or stethoscope_y is None: stethoscope_x = 0 stethoscope_y = 0 - # 可視化結果の画像を取得してBGRに変換 + # Get visualization and convert to BGR stethoscope_overlay_img = result["visualization"][0] if ( len(stethoscope_overlay_img.shape) == 3 @@ -206,7 +210,7 @@ stethoscope_overlay_img, cv2.COLOR_RGB2BGR ) - # ポリゴンと検出位置を描画 + # Draw polygon and detection stethoscope_overlay_img = draw_polygon_and_detection( stethoscope_overlay_img, polygon_vertices, stethoscope_x, stethoscope_y ) @@ -329,7 +333,6 @@ left_hip = landmarks[2] right_hip = landmarks[3] - # process_images関数内のRTMPOSE_ENABLEDの部分を修正 if RTMPOSE_ENABLED: frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) det_result = inference_detector(detector, frame) @@ -348,10 +351,6 @@ print(f"Failed to extract keypoints for image: {image_path}") continue - """ left_shoulder = pose_keypoints[5] - right_shoulder = pose_keypoints[6] - left_hip = pose_keypoints[11] - right_hip = pose_keypoints[12] テレコ確認""" left_shoulder = pose_keypoints[6] right_shoulder = pose_keypoints[5] left_hip = pose_keypoints[12] @@ -388,7 +387,6 @@ ) if RTMPOSE_ENABLED and YOLOX_ENABLED: - # RTMPoseとYOLOXの場合は既にBGRなので変換不要 cv2.imwrite( os.path.join(pose_overlay_dir, image_file_name), pose_overlay_img ) @@ -397,7 +395,6 @@ stethoscope_overlay_img, ) else: - # その他の場合は従来通り色変換を行う cv2.imwrite( os.path.join(pose_overlay_dir, image_file_name), cv2.cvtColor(pose_overlay_img, cv2.COLOR_RGB2BGR), @@ -481,6 +478,10 @@ fieldnames.extend(["Xgboost_stethoscope_x", "Xgboost_stethoscope_y"]) if LIGHTGBM_ENABLED: fieldnames.extend(["lightGBM_stethoscope_x", "lightGBM_stethoscope_y"]) + if CATBOOST_ENABLED: + fieldnames.extend(["catboost_stethoscope_x", "catboost_stethoscope_y"]) + if NGBOOST_ENABLED: + fieldnames.extend(["ngboost_stethoscope_x", "ngboost_stethoscope_y"]) if EARSNET_ENABLED: fieldnames.extend(["earsnet_stethoscope_x", "earsnet_stethoscope_y"]) @@ -498,7 +499,20 @@ xg_model_y = load_model( "./models/XGBoost/stethoscope_calc_y_best_model.pkl" ) - # Load models + if CATBOOST_ENABLED: + catboost_model_x = load_model( + "./models/CatBoost/stethoscope_calc_x_best_model.pkl" + ) + catboost_model_y = load_model( + "./models/CatBoost/stethoscope_calc_y_best_model.pkl" + ) + if NGBOOST_ENABLED: + ngboost_model_x = load_model( + "./models/NGBoost/stethoscope_calc_x_best_model.pkl" + ) + ngboost_model_y = load_model( + "./models/NGBoost/stethoscope_calc_y_best_model.pkl" + ) if EARSNET_ENABLED: earsnet_predictor = load_earsnet_model( model_path="models/EARSNet/best_model-50-F2.pth", @@ -522,6 +536,10 @@ prev_values["lightGBM"] = (180, 180) if XGBOOST_ENABLED: prev_values["Xgboost"] = (180, 180) + if CATBOOST_ENABLED: + prev_values["catboost"] = (180, 180) + if NGBOOST_ENABLED: + prev_values["ngboost"] = (180, 180) if EARSNET_ENABLED: prev_values["earsnet"] = (180, 180) @@ -605,6 +623,38 @@ norm_row["Xgboost_stethoscope_y"], ) = xg_x, xg_y + if CATBOOST_ENABLED: + catboost_x = int( + catboost_model_x.predict(input_data[input_columns])[0] + ) + catboost_y = int( + catboost_model_y.predict(input_data[input_columns])[0] + ) + row["catboost_stethoscope_x"], row["catboost_stethoscope_y"] = ( + catboost_x, + catboost_y, + ) + ( + norm_row["catboost_stethoscope_x"], + norm_row["catboost_stethoscope_y"], + ) = catboost_x, catboost_y + + if NGBOOST_ENABLED: + ngboost_x = int( + ngboost_model_x.predict(input_data[input_columns])[0] + ) + ngboost_y = int( + ngboost_model_y.predict(input_data[input_columns])[0] + ) + row["ngboost_stethoscope_x"], row["ngboost_stethoscope_y"] = ( + ngboost_x, + ngboost_y, + ) + ( + norm_row["ngboost_stethoscope_x"], + norm_row["ngboost_stethoscope_y"], + ) = ngboost_x, ngboost_y + if EARSNET_ENABLED: image_path = os.path.join(base_dir, row["image_file_name"]) earsnet_coords = predict_earsnet(earsnet_predictor, image_path) @@ -635,7 +685,7 @@ def generate_visualizations(csv_path, original_images_dir, results_dir): df = pd.read_csv(csv_path) - body_image = cv2.imread("./images/body/BodyF.png") + body_image = cv2.imread("./images/body/BodyB.png") dirs = {"marked": "marked_images"} if CONV_ENABLED: @@ -644,6 +694,10 @@ dirs["Xgboost"] = "Xgboost" if LIGHTGBM_ENABLED: dirs["lightGBM"] = "lightGBM" + if CATBOOST_ENABLED: + dirs["catboost"] = "catboost" + if NGBOOST_ENABLED: + dirs["ngboost"] = "ngboost" if EARSNET_ENABLED: dirs["earsnet"] = "earsnet" dirs["combined"] = "combined" @@ -664,6 +718,8 @@ "conv": CONV_COLOR, "Xgboost": XGBOOST_COLOR, "lightGBM": LIGHTGBM_COLOR, + "catboost": CATBOOST_COLOR, + "ngboost": NGBOOST_COLOR, "earsnet": EARSNET_COLOR, } @@ -807,10 +863,9 @@ def main(): parser = argparse.ArgumentParser(description="Process video and generate results.") - parser = argparse.ArgumentParser(description="Process video and generate results.") parser.add_argument( "--video_path", - default="./video/Test3-1.mp4", + default="./video/Media1.mp4", help="Path to the input video file", ) parser.add_argument( diff --git a/requirements.txt b/requirements.txt index 43eb7ba..060b091 100644 --- a/requirements.txt +++ b/requirements.txt @@ -10,4 +10,5 @@ scipy==1.9.3 numpy==1.24.0 scikit-learn==1.5.1 -matplotlib==3.9.2 \ No newline at end of file +matplotlib==3.9.2 +moviepy \ No newline at end of file diff --git a/util/calc_ste_position.py b/util/calc_ste_position.py index 63721f8..27d748a 100644 --- a/util/calc_ste_position.py +++ b/util/calc_ste_position.py @@ -1,6 +1,7 @@ import cv2 import numpy as np -import modules.util.const as const + +import util.const as const class CalcStethoscopePosition: