diff --git a/config.py b/config.py index 72790e6..62d026d 100644 --- a/config.py +++ b/config.py @@ -15,9 +15,9 @@ CONV_ENABLED = True XGBOOST_ENABLED = True LIGHTGBM_ENABLED = True -CATBOOST_ENABLED = True -NGBOOST_ENABLED = True -NORMALIZE_ENABLED = False +CATBOOST_ENABLED = False +NGBOOST_ENABLED = False +NORMALIZE_ENABLED = True POSENET_ENABLED = False RTMPOSE_ENABLED = True MOBILENETV1SSD_ENABLED = False diff --git a/main.py b/main.py index 98803c6..ad37402 100644 --- a/main.py +++ b/main.py @@ -47,9 +47,18 @@ NORMALIZE_ENABLED = config.NORMALIZE_ENABLED +def load_model(model_path, model_type="lgb"): + with open(model_path, "rb") as model_file: + return pickle.load(model_file) + + +def load_scaler(scaler_path): + with open(scaler_path, "rb") as f: + return pickle.load(f) + + def init_yolox(): try: - # Set MMDetection default scope from mmengine.registry import DefaultScope DefaultScope.get_instance("mmdet", scope_name="mmdet") @@ -69,37 +78,24 @@ def draw_polygon_and_detection(image, polygon_vertices, stethoscope_x, stethoscope_y): - """ - Draw pentagon region and detected stethoscope position - """ - # Create a copy of the image overlay = image.copy() - - # Draw pentagon vertices = polygon_vertices.astype(np.int32) cv2.polylines(overlay, [vertices], True, (0, 255, 0), 2) - # Draw detected stethoscope position (if exists) if stethoscope_x is not None and stethoscope_y is not None: center = (int(stethoscope_x), int(stethoscope_y)) - cv2.circle(overlay, center, 10, (255, 0, 0), -1) # Blue circle for detection - cv2.circle(overlay, center, 12, (255, 255, 255), 2) # White border - + cv2.circle(overlay, center, 10, (255, 0, 0), -1) + cv2.circle(overlay, center, 12, (255, 255, 255), 2) return overlay def expand_points(p1, p2): - """ - Expand distance between two points by 2x while keeping midpoint - """ mid_x = (p1[0] + p2[0]) / 2 mid_y = (p1[1] + p2[1]) / 2 - # Calculate vector from midpoint vec_x = p1[0] - mid_x vec_y = p1[1] - mid_y - # Expand vector by 2x new_p1 = [mid_x + vec_x * 2, mid_y + vec_y * 2] new_p2 = [mid_x - vec_x * 2, mid_y - vec_y * 2] @@ -107,9 +103,6 @@ def point_in_polygon(point, vertices): - """ - Determine if a point is inside a polygon - """ x, y = point n = len(vertices) inside = False @@ -130,44 +123,24 @@ def yolox_detector_inference(frame, yolox_inferencer, pose_keypoints, score_thr=0.3): - """ - Detect stethoscope using YOLOX and return only detections within body polygon - - Args: - frame: Input image - yolox_inferencer: YOLOX inference model - pose_keypoints: Keypoints detected by RTMPose - score_thr: Detection score threshold - - Returns: - tuple: (overlay image, detected x coordinate, detected y coordinate) - """ - # Convert BGR to RGB frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) - - # Run detection result = yolox_inferencer(inputs=frame_rgb, return_vis=True) - - # Get predictions predictions = result["predictions"][0] stethoscope_x = None stethoscope_y = None max_score = -1 - # Generate pentagon vertices from keypoints nose = pose_keypoints[0] left_shoulder = pose_keypoints[5] right_shoulder = pose_keypoints[6] left_hip = pose_keypoints[11] right_hip = pose_keypoints[12] - # Expand shoulder and hip coordinates expanded_left_shoulder, expanded_right_shoulder = expand_points( left_shoulder, right_shoulder ) expanded_left_hip, expanded_right_hip = expand_points(left_hip, right_hip) - # Define pentagon vertices polygon_vertices = np.array( [ nose, @@ -178,29 +151,24 @@ ] ) - # Process each detection for i, (label, score) in enumerate( zip(predictions["labels"], predictions["scores"]) ): if score >= score_thr and label == 0: # label 0 is stethoscope bbox = predictions["bboxes"][i] - # Calculate center coordinates center_x = (bbox[0] + bbox[2]) / 2 center_y = (bbox[1] + bbox[3]) / 2 - # Check if center point is inside pentagon if point_in_polygon([center_x, center_y], polygon_vertices): if score > max_score: stethoscope_x = center_x stethoscope_y = center_y max_score = score - # Return 0,0 if no detection if stethoscope_x is None or stethoscope_y is None: stethoscope_x = 0 stethoscope_y = 0 - # Get visualization and convert to BGR stethoscope_overlay_img = result["visualization"][0] if ( len(stethoscope_overlay_img.shape) == 3 @@ -210,7 +178,6 @@ stethoscope_overlay_img, cv2.COLOR_RGB2BGR ) - # Draw polygon and detection stethoscope_overlay_img = draw_polygon_and_detection( stethoscope_overlay_img, polygon_vertices, stethoscope_x, stethoscope_y ) @@ -218,11 +185,6 @@ return stethoscope_overlay_img, stethoscope_x, stethoscope_y -def load_model(model_path, model_type="lgb"): - with open(model_path, "rb") as model_file: - return pickle.load(model_file) - - def normalize_quadrilateral_with_point(points, extra_point): all_points = np.vstack([points.reshape(-1, 2), extra_point]) center = np.mean(points.reshape(-1, 2), axis=0) @@ -351,10 +313,10 @@ print(f"Failed to extract keypoints for image: {image_path}") continue - left_shoulder = pose_keypoints[6] - right_shoulder = pose_keypoints[5] - left_hip = pose_keypoints[12] - right_hip = pose_keypoints[11] + left_shoulder = pose_keypoints[5] + right_shoulder = pose_keypoints[6] + left_hip = pose_keypoints[11] + right_hip = pose_keypoints[12] if visualizer is not None: visualizer.add_datasample( @@ -485,6 +447,7 @@ if EARSNET_ENABLED: fieldnames.extend(["earsnet_stethoscope_x", "earsnet_stethoscope_y"]) + # ★ 変更箇所: scalerをX座標用とY座標用で読み込む if LIGHTGBM_ENABLED: lgb_model_x = load_model( "./models/LightGBM/stethoscope_calc_x_best_model.pkl" @@ -492,6 +455,12 @@ lgb_model_y = load_model( "./models/LightGBM/stethoscope_calc_y_best_model.pkl" ) + lgb_scaler_x = load_scaler( + "./models/LightGBM/scaler-x.pkl" + ) # X座標用scaler + lgb_scaler_y = load_scaler( + "./models/LightGBM/scaler-y.pkl" + ) # Y座標用scaler if XGBOOST_ENABLED: xg_model_x = load_model( "./models/XGBoost/stethoscope_calc_x_best_model.pkl" @@ -499,6 +468,8 @@ xg_model_y = load_model( "./models/XGBoost/stethoscope_calc_y_best_model.pkl" ) + xg_scaler_x = load_scaler("./models/XGBoost/scaler-x.pkl") # X座標用scaler + xg_scaler_y = load_scaler("./models/XGBoost/scaler-y.pkl") # Y座標用scaler if CATBOOST_ENABLED: catboost_model_x = load_model( "./models/CatBoost/stethoscope_calc_x_best_model.pkl" @@ -520,6 +491,19 @@ model_version="50", ) + input_columns = [ + "left_shoulder_x", + "left_shoulder_y", + "right_shoulder_x", + "right_shoulder_y", + "left_hip_x", + "left_hip_y", + "right_hip_x", + "right_hip_y", + "stethoscope_x", + "stethoscope_y", + ] + with ( open(csv_path, "w", newline="") as csvfile, open(normalized_csv_path, "w", newline="") as norm_csvfile, @@ -544,23 +528,12 @@ prev_values["earsnet"] = (180, 180) for i, (row, norm_row) in enumerate(zip(rows, normalized_rows)): - source_points = np.array( - [ - [float(row[f"{pos}_x"]), float(row[f"{pos}_y"])] - for pos in [ - "left_shoulder", - "right_shoulder", - "left_hip", - "right_hip", - ] - ], - dtype=np.float32, - ) - stethoscope_point = np.array( - [float(row["stethoscope_x"]), float(row["stethoscope_y"])] - ) + if NORMALIZE_ENABLED: + input_data = pd.DataFrame([norm_row]) + else: + input_data = pd.DataFrame([row]) - if stethoscope_point[0] == 0 and stethoscope_point[1] == 0: + if row["stethoscope_x"] == 0 and row["stethoscope_y"] == 0: for key in prev_values: row[f"{key}_stethoscope_x"], row[f"{key}_stethoscope_y"] = ( prev_values[key] @@ -570,7 +543,23 @@ norm_row[f"{key}_stethoscope_y"], ) = prev_values[key] else: + # conv if CONV_ENABLED: + source_points = np.array( + [ + [float(row[f"{pos}_x"]), float(row[f"{pos}_y"])] + for pos in [ + "left_shoulder", + "right_shoulder", + "left_hip", + "right_hip", + ] + ], + dtype=np.float32, + ) + stethoscope_point = np.array( + [float(row["stethoscope_x"]), float(row["stethoscope_y"])] + ) conv_stethoscope = calc_position.calc_affine( source_points, *stethoscope_point ) @@ -582,48 +571,42 @@ norm_row["conv_stethoscope_y"], ) = conv_stethoscope - if NORMALIZE_ENABLED: - input_data = pd.DataFrame([norm_row]) - else: - input_data = pd.DataFrame([row]) - - input_columns = [ - f"{pos}_{coord}" - for pos in [ - "left_shoulder", - "right_shoulder", - "left_hip", - "right_hip", - "stethoscope", - ] - for coord in ["x", "y"] - ] - + # LightGBM prediction if LIGHTGBM_ENABLED: - lgb_x = int(lgb_model_x.predict(input_data[input_columns])[0]) - lgb_y = int(lgb_model_y.predict(input_data[input_columns])[0]) + # ★ X座標用scalerで標準化してXモデルで予測 + X_scaled_x = lgb_scaler_x.transform(input_data[input_columns]) + lgb_x_pred = int(lgb_model_x.predict(X_scaled_x)[0]) + # ★ Y座標用scalerで標準化してYモデルで予測 + X_scaled_y = lgb_scaler_y.transform(input_data[input_columns]) + lgb_y_pred = int(lgb_model_y.predict(X_scaled_y)[0]) row["lightGBM_stethoscope_x"], row["lightGBM_stethoscope_y"] = ( - lgb_x, - lgb_y, + lgb_x_pred, + lgb_y_pred, ) ( norm_row["lightGBM_stethoscope_x"], norm_row["lightGBM_stethoscope_y"], - ) = lgb_x, lgb_y + ) = (lgb_x_pred, lgb_y_pred) + # XGBoost prediction if XGBOOST_ENABLED: - xg_x = int(xg_model_x.predict(input_data[input_columns])[0]) - xg_y = int(xg_model_y.predict(input_data[input_columns])[0]) + # ★ XGBoost用も同様に分けて適用 + X_scaled_x = xg_scaler_x.transform(input_data[input_columns]) + xg_x_pred = int(xg_model_x.predict(X_scaled_x)[0]) + X_scaled_y = xg_scaler_y.transform(input_data[input_columns]) + xg_y_pred = int(xg_model_y.predict(X_scaled_y)[0]) row["Xgboost_stethoscope_x"], row["Xgboost_stethoscope_y"] = ( - xg_x, - xg_y, + xg_x_pred, + xg_y_pred, ) ( norm_row["Xgboost_stethoscope_x"], norm_row["Xgboost_stethoscope_y"], - ) = xg_x, xg_y + ) = (xg_x_pred, xg_y_pred) + # CatBoost prediction if CATBOOST_ENABLED: + # CatBoostはscaler未使用の例として据え置き(必要なら同様にscaler分けが可能) catboost_x = int( catboost_model_x.predict(input_data[input_columns])[0] ) @@ -637,8 +620,9 @@ ( norm_row["catboost_stethoscope_x"], norm_row["catboost_stethoscope_y"], - ) = catboost_x, catboost_y + ) = (catboost_x, catboost_y) + # NGBoost prediction if NGBOOST_ENABLED: ngboost_x = int( ngboost_model_x.predict(input_data[input_columns])[0] @@ -653,8 +637,9 @@ ( norm_row["ngboost_stethoscope_x"], norm_row["ngboost_stethoscope_y"], - ) = ngboost_x, ngboost_y + ) = (ngboost_x, ngboost_y) + # EARSNET prediction if EARSNET_ENABLED: image_path = os.path.join(base_dir, row["image_file_name"]) earsnet_coords = predict_earsnet(earsnet_predictor, image_path) @@ -668,8 +653,8 @@ for key in prev_values: prev_values[key] = ( - row[f"{key}_stethoscope_x"], - row[f"{key}_stethoscope_y"], + row.get(f"{key}_stethoscope_x", prev_values[key][0]), + row.get(f"{key}_stethoscope_y", prev_values[key][1]), ) writer.writerow(row) @@ -750,7 +735,6 @@ original_image, ) - # Combined trajectoryのための画像 combined_image_with_traj = body_image.copy() combined_image_without_traj = body_image.copy() @@ -764,7 +748,6 @@ x, y = int(row[f"{key}_stethoscope_x"]), int(row[f"{key}_stethoscope_y"]) points[key].append((x, y)) - # 個別の手法の軌跡 image_with_trajectory = body_image.copy() if len(points[key]) > 1: cv2.polylines( @@ -782,7 +765,6 @@ image_with_trajectory, ) - # Combined imageに軌跡を追加 if len(points[key]) > 1: cv2.polylines( combined_image_with_traj, @@ -805,7 +787,6 @@ image_without_trajectory, ) - # Combined imageを保存 cv2.imwrite( os.path.join( results_dir, "combined_with_trajectory", row["image_file_name"]