diff --git a/config.py b/config.py
index 62d026d..4c46e69 100644
--- a/config.py
+++ b/config.py
@@ -22,6 +22,7 @@
 RTMPOSE_ENABLED = True
 MOBILENETV1SSD_ENABLED = False
 YOLOX_ENABLED = True
+EARSNET_CROP_ENABLED = True
 
 # Neural network model settings
 EARSNET_ENABLED = True
diff --git a/main.py b/main.py
index 89e0bf0..81ed270 100644
--- a/main.py
+++ b/main.py
@@ -11,7 +11,7 @@
 import pandas as pd
 from mmdet.apis import DetInferencer, inference_detector, init_detector
 
-# New imports for RTMPose
+# RTMpose
 from mmpose.apis import inference_topdown
 from mmpose.apis import init_model as init_pose_estimator
 from mmpose.evaluation.functional import nms
@@ -21,12 +21,16 @@
 
 import config
 
-# EARSNetPredictor のみをインポート
+# EARSNet
 from modules.EARSNet.predictor import EARSNetPredictor
+
+# Utilities
 from util.calc_ste_position import CalcStethoscopePosition
 from util.ears_ai import EarsAI
 
-# Get colors from config
+###############################################################################
+# Config 値を参照
+###############################################################################
 CONV_COLOR = config.CONV_COLOR
 XGBOOST_COLOR = config.XGBOOST_COLOR
 LIGHTGBM_COLOR = config.LIGHTGBM_COLOR
@@ -34,7 +38,6 @@
 CATBOOST_COLOR = config.CATBOOST_COLOR
 NGBOOST_COLOR = config.NGBOOST_COLOR
 
-# Get model execution settings
 CONV_ENABLED = config.CONV_ENABLED
 XGBOOST_ENABLED = config.XGBOOST_ENABLED
 LIGHTGBM_ENABLED = config.LIGHTGBM_ENABLED
@@ -46,16 +49,18 @@
 YOLOX_ENABLED = config.YOLOX_ENABLED
 EARSNET_ENABLED = config.EARSNET_ENABLED
 
-# Get normalization setting
+# ★ クロップ画像を使う EARSNet (別モデル) を使うかどうか
+EARSNET_CROP_ENABLED = config.EARSNET_CROP_ENABLED
+
 NORMALIZE_ENABLED = config.NORMALIZE_ENABLED
 
+DEVICE = config.DEVICE  # "cuda" or "cpu" など
+
 ###############################################################################
 # リアルタイムFPS計測用のグローバル変数＆スレッド定義
 ###############################################################################
 processed_frames = 0  # 処理済みフレーム数(メインスレッドでインクリメント)
 stop_fps_thread = False  # スレッド終了フラグ
-
-# 必要に応じてリアルタイムFPSの履歴を保存するリスト (後でCSV化したい場合)
 fps_history = []
 
 
@@ -86,16 +91,14 @@
             f"[FPS Monitor] Real-time FPS: {current_fps:.2f}  (frames: +{frames_delta})"
         )
 
-        # 履歴を残したい場合は下記を使用
         fps_history.append((now, current_fps))
 
-        # カウント更新
         last_count = current_count
         last_time = now
 
 
 ###############################################################################
-# 以下は従来の処理 (姿勢推定、聴診器検出、FPS計測など)
+# モデルロード系
 ###############################################################################
 def load_model(model_path, model_type="lgb"):
     with open(model_path, "rb") as model_file:
@@ -107,6 +110,9 @@
         return pickle.load(f)
 
 
+###############################################################################
+# YOLOX
+###############################################################################
 def init_yolox():
     try:
         from mmengine.registry import DefaultScope
@@ -116,9 +122,8 @@
         init_args = {
             "model": config.YOLOX_CONFIG_FILE,
             "weights": config.YOLOX_CHECKPOINT_FILE,
-            "device": config.DEVICE,
+            "device": DEVICE,
         }
-
         yolox_inferencer = DetInferencer(**init_args)
         return yolox_inferencer
 
@@ -140,6 +145,9 @@
 
 
 def expand_points(p1, p2):
+    """
+    2点を中央から外側に拡張(肩や腰の領域を拡大する用途)するヘルパー関数
+    """
     mid_x = (p1[0] + p2[0]) / 2
     mid_y = (p1[1] + p2[1]) / 2
 
@@ -180,7 +188,7 @@
     stethoscope_y = None
     max_score = -1
 
-    # keypoints 配列から部位を取得
+    # keypoints 配列から部位を取得 (COCOフォーマット想定)
     nose = pose_keypoints[0]
     left_shoulder = pose_keypoints[5]
     right_shoulder = pose_keypoints[6]
@@ -206,7 +214,7 @@
     for i, (label, score) in enumerate(
         zip(predictions["labels"], predictions["scores"])
     ):
-        # label=0 → 聴診器と仮定 (実際は学習クラスによって要変更)
+        # label=0 → 聴診器と仮定 (学習済みクラスのラベルに合わせる)
         if score >= score_thr and label == 0:
             bbox = predictions["bboxes"][i]
             center_x = (bbox[0] + bbox[2]) / 2
@@ -238,6 +246,9 @@
     return stethoscope_overlay_img, stethoscope_x, stethoscope_y
 
 
+###############################################################################
+# 各種座標変換
+###############################################################################
 def normalize_quadrilateral_with_point(points, extra_point):
     """4点（肩・肩・腰・腰）と任意の1点（聴診器）を正規化して返す。"""
     all_points = np.vstack([points.reshape(-1, 2), extra_point])
@@ -263,6 +274,7 @@
     )
     if max_edge_length == 0:
         return rotated_points  # 0割り防止
+
     return rotated_points / max_edge_length
 
 
@@ -289,6 +301,9 @@
     print(f"All frames saved to {output_dir}")
 
 
+###############################################################################
+# RTMpose キーポイント抽出
+###############################################################################
 def extract_keypoints_rtmpose(pose_results):
     if not pose_results:
         print("No pose results found.")
@@ -312,16 +327,48 @@
     return keypoints
 
 
+###############################################################################
+# 胴体クロップ生成
+###############################################################################
+def crop_body_from_keypoints(frame, left_shoulder, right_shoulder, left_hip, right_hip):
+    """
+    RTMPOSE 等で推定された肩・腰をもとに胴体をざっくり囲むバウンディングボックスを計算し、
+    そこをクロップして返す。
+    戻り値: (cropped_frame, (xmin, ymin))
+       cropped_frame: クロップ後の画像 (np.ndarray)
+       (xmin, ymin): クロップ領域の左上座標 (元画像座標系へのマッピング用)
+    """
+    h, w, _ = frame.shape
+
+    # 左右肩・左右腰 4点の x, y
+    xs = [left_shoulder[0], right_shoulder[0], left_hip[0], right_hip[0]]
+    ys = [left_shoulder[1], right_shoulder[1], left_hip[1], right_hip[1]]
+
+    xmin = int(min(xs))
+    xmax = int(max(xs))
+    ymin = int(min(ys))
+    ymax = int(max(ys))
+
+    # 多少のマージンを足す (上下左右に 20 ピクセルなど)
+    margin = 20
+    xmin = max(0, xmin - margin)
+    xmax = min(w, xmax + margin)
+    ymin = max(0, ymin - margin)
+    ymax = min(h, ymax + margin)
+
+    cropped_frame = frame[ymin:ymax, xmin:xmax].copy()
+
+    return cropped_frame, (xmin, ymin)
+
+
+###############################################################################
+# メイン処理
+###############################################################################
 def process_images(args, detector, pose_estimator, visualizer):
-    """
-    メインスレッドでフレームごとの推論を行う。
-    別スレッドでリアルタイムFPSを計測しているため、
-    フレーム処理終了後に processed_frames をインクリメントする。
-    """
-    print("Starting process_images function...")
-    global processed_frames  # 別スレッドと共有
+    global processed_frames
     ears_ai = EarsAI()
     calc_position = CalcStethoscopePosition()
+
     base_dir = os.path.join(args.output_dir, "frames")
     results_dir = os.path.join(args.output_dir, "results")
     csv_path = os.path.join(results_dir, "results.csv")
@@ -329,6 +376,10 @@
     pose_overlay_dir = os.path.join(results_dir, "pose_overlay_image")
     stethoscope_overlay_dir = os.path.join(results_dir, "stethoscope_overlay_image")
 
+    # クロップ画像を保存するディレクトリを作成
+    cropped_dir = os.path.join(results_dir, "cropped_images")
+    os.makedirs(cropped_dir, exist_ok=True)
+
     os.makedirs(results_dir, exist_ok=True)
     os.makedirs(pose_overlay_dir, exist_ok=True)
     os.makedirs(stethoscope_overlay_dir, exist_ok=True)
@@ -337,21 +388,21 @@
         [f for f in os.listdir(base_dir) if f.lower().endswith(".png")],
         key=lambda x: int(re.search(r"(\d+)", x).group(1)),
     )
-    print(f"Found {len(png_files)} PNG files.")
+    print(f"Found {len(png_files)} PNG files in {base_dir}.")
 
     rows = []
     normalized_rows = []
 
-    # ----------------
+    # ------------------------------------------
     # YOLOX 初期化
-    # ----------------
+    # ------------------------------------------
     yolox_inferencer = None
     if YOLOX_ENABLED:
         yolox_inferencer = init_yolox()
 
-    # ----------------
+    # ------------------------------------------
     # 時間計測用 dict
-    # ----------------
+    # ------------------------------------------
     timings = {
         # 単体推論
         "rtmpose_single": [],
@@ -360,26 +411,31 @@
         "lightgbm_single": [],
         "xgboost_single": [],
         "earsnet_single": [],
-        # パイプライン推論 (RTMPose+YOLOX → 各モデル)
+        "earsnet_cropped_single": [],
+        # パイプライン推論
         "pipeline_rtmpose_yolox_conv": [],
         "pipeline_rtmpose_yolox_lightgbm": [],
         "pipeline_rtmpose_yolox_xgboost": [],
-        "pipeline_earsnet": [],
+        # 今回修正
+        "pipeline_earsnet": [],  # EARSNet 単体
+        "pipeline_earsnet_cropped": [],  # RTMPose + EARSNet(クロップ)
     }
 
-    # ----------------
+    # ------------------------------------------
     # 各モデルの事前ロード
-    # ----------------
+    # ------------------------------------------
     if LIGHTGBM_ENABLED:
         lgb_model_x = load_model("./models/LightGBM/stethoscope_calc_x_best_model.pkl")
         lgb_model_y = load_model("./models/LightGBM/stethoscope_calc_y_best_model.pkl")
         lgb_scaler_x = load_scaler("./models/LightGBM/scaler-x.pkl")
         lgb_scaler_y = load_scaler("./models/LightGBM/scaler-y.pkl")
+
     if XGBOOST_ENABLED:
         xg_model_x = load_model("./models/XGBoost/stethoscope_calc_x_best_model.pkl")
         xg_model_y = load_model("./models/XGBoost/stethoscope_calc_y_best_model.pkl")
         xg_scaler_x = load_scaler("./models/XGBoost/scaler-x.pkl")
         xg_scaler_y = load_scaler("./models/XGBoost/scaler-y.pkl")
+
     if CATBOOST_ENABLED:
         catboost_model_x = load_model(
             "./models/CatBoost/stethoscope_calc_x_best_model.pkl"
@@ -387,6 +443,7 @@
         catboost_model_y = load_model(
             "./models/CatBoost/stethoscope_calc_y_best_model.pkl"
         )
+
     if NGBOOST_ENABLED:
         ngboost_model_x = load_model(
             "./models/NGBoost/stethoscope_calc_x_best_model.pkl"
@@ -395,15 +452,25 @@
             "./models/NGBoost/stethoscope_calc_y_best_model.pkl"
         )
 
-    # EARSNET を使用する場合のみ初期化
+    # 通常 EARSNet (クロップなし)
     if EARSNET_ENABLED:
         earsnet_predictor = EARSNetPredictor(
             weight_path="models/EARSNet/best_model.pth",
-            resnet_depth="18",  # 学習時と同じResNet深度
-            pretrained=True,  # 学習時の設定に合わせる
-            device="cuda",  # or "cpu"
+            resnet_depth="18",
+            pretrained=True,
+            device=DEVICE,
         )
 
+    # クロップ画像用 EARSNet (別モデル)
+    if EARSNET_CROP_ENABLED:
+        earsnet_cropped_predictor = EARSNetPredictor(
+            weight_path="models/EARSNet/crop/best_model.pth",  # 想定モデルファイル
+            resnet_depth="18",
+            pretrained=True,
+            device=DEVICE,
+        )
+
+    # CSVで使用する列
     input_columns = [
         "left_shoulder_x",
         "left_shoulder_y",
@@ -427,37 +494,15 @@
             print(f"Failed to load image: {image_path}")
             continue
 
-        # (A) -- 姿勢推定(RTMPose or PoseNet) & YOLOX 推論までの時間測定の準備
-        pipeline_detection_start = time.time()
-
-        # ============================================================
-        # (1) PoseNet or RTMPOSE による姿勢推定（肩・腰座標取得）
-        # ============================================================
-        left_shoulder = (0, 0)
-        right_shoulder = (0, 0)
-        left_hip = (0, 0)
-        right_hip = (0, 0)
-        pose_overlay_img = frame.copy()
-
-        if POSENET_ENABLED:
-            # ▼ PoseNet
-            start_time_pose = time.time()
-            pose_overlay_img, *landmarks = ears_ai.pose_detect(frame, None)
-            end_time_pose = time.time()
-            timings["rtmpose_single"].append(end_time_pose - start_time_pose)
-
-            left_shoulder = landmarks[0]
-            right_shoulder = landmarks[1]
-            left_hip = landmarks[2]
-            right_hip = landmarks[3]
-
-        elif RTMPOSE_ENABLED:
-            # ▼ RTMPOSE
+        # (A) RTMPose
+        rtmpose_time = 0.0
+        if RTMPOSE_ENABLED:
+            start_time_rtmpose = time.time()
+            # ===== RTMpose推論 =====
             frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
-
-            start_time_pose = time.time()
             det_result = inference_detector(detector, frame_rgb)
             pred_instance = det_result.pred_instances.cpu().numpy()
+
             bboxes = np.concatenate(
                 (pred_instance.bboxes, pred_instance.scores[:, None]), axis=1
             )
@@ -466,24 +511,20 @@
                 np.logical_and(pred_instance.labels == 0, pred_instance.scores > 0.3)
             ]
             bboxes = bboxes[nms(bboxes, 0.3), :4]
+
             pose_results = inference_topdown(pose_estimator, frame_rgb, bboxes)
             data_samples = merge_data_samples(pose_results)
             pose_keypoints = extract_keypoints_rtmpose(pose_results)
-            end_time_pose = time.time()
 
-            timings["rtmpose_single"].append(end_time_pose - start_time_pose)
+            end_time_rtmpose = time.time()
+            rtmpose_time = end_time_rtmpose - start_time_rtmpose
+            timings["rtmpose_single"].append(rtmpose_time)
 
             if pose_keypoints is None:
                 print(f"Failed to extract keypoints for image: {image_path}")
                 processed_frames += 1
                 continue
 
-            left_shoulder = pose_keypoints[5]
-            right_shoulder = pose_keypoints[6]
-            left_hip = pose_keypoints[11]
-            right_hip = pose_keypoints[12]
-
-            # 可視化
             if visualizer is not None:
                 visualizer.add_datasample(
                     "result",
@@ -499,21 +540,45 @@
                     kpt_thr=0.3,
                 )
             pose_overlay_img = visualizer.get_image()  # (RGB)
-
-        # ============================================================
-        # (2) YOLOX or SSD で聴診器の推定（必要に応じて）
-        # ============================================================
-        stethoscope_overlay_img = frame.copy()
-        stethoscope_x = 0
-        stethoscope_y = 0
-
-        if MobileNetV1SSD_ENABLED:
-            start_time_ssd = time.time()
-            stethoscope_overlay_img, stethoscope_x, stethoscope_y = ears_ai.ssd_detect(
-                frame, None
+            pose_overlay_bgr = cv2.cvtColor(pose_overlay_img, cv2.COLOR_RGB2BGR)
+            cv2.imwrite(
+                os.path.join(pose_overlay_dir, image_file_name), pose_overlay_bgr
             )
-            end_time_ssd = time.time()
 
+            # COCOフォーマットのキーポイントを取り出す
+            left_shoulder = (pose_keypoints[5][0], pose_keypoints[5][1])
+            right_shoulder = (pose_keypoints[6][0], pose_keypoints[6][1])
+            left_hip = (pose_keypoints[11][0], pose_keypoints[11][1])
+            right_hip = (pose_keypoints[12][0], pose_keypoints[12][1])
+
+        elif POSENET_ENABLED:
+            # 既存 PoseNet
+            start_time_rtmpose = time.time()
+            pose_overlay_img, *landmarks = ears_ai.pose_detect(frame, None)
+            end_time_rtmpose = time.time()
+            rtmpose_time = end_time_rtmpose - start_time_rtmpose
+            timings["rtmpose_single"].append(rtmpose_time)
+
+            # landmarks = [left_shoulder, right_shoulder, left_hip, right_hip]
+            left_shoulder = landmarks[0]
+            right_shoulder = landmarks[1]
+            left_hip = landmarks[2]
+            right_hip = landmarks[3]
+
+            # pose_overlay_img はすでに BGR 形式想定
+            cv2.imwrite(
+                os.path.join(pose_overlay_dir, image_file_name), pose_overlay_img
+            )
+        else:
+            # RTMPose/PoseNet どちらも有効でない場合
+            left_shoulder = (0, 0)
+            right_shoulder = (0, 0)
+            left_hip = (0, 0)
+            right_hip = (0, 0)
+
+        # (B) YOLOX (必要なら)
+        yolox_time = 0.0
+        stethoscope_x, stethoscope_y = 0, 0
         if YOLOX_ENABLED:
             if (
                 RTMPOSE_ENABLED
@@ -521,114 +586,119 @@
                 and pose_keypoints is not None
             ):
                 start_time_yolox = time.time()
-                (
-                    stethoscope_overlay_img,
-                    stethoscope_x,
-                    stethoscope_y,
-                ) = yolox_detector_inference(frame, yolox_inferencer, pose_keypoints)
+                stethoscope_overlay_img, stethoscope_x, stethoscope_y = (
+                    yolox_detector_inference(frame, yolox_inferencer, pose_keypoints)
+                )
                 end_time_yolox = time.time()
-                timings["yolox_single"].append(end_time_yolox - start_time_yolox)
+                yolox_time = end_time_yolox - start_time_yolox
+                timings["yolox_single"].append(yolox_time)
+
+                # 可視化
+                cv2.imwrite(
+                    os.path.join(stethoscope_overlay_dir, image_file_name),
+                    stethoscope_overlay_img,
+                )
 
             elif POSENET_ENABLED:
+                # PoseNet 用のキー配列に変換してYOLOX
                 pose_keypoints_pose_net = [[0, 0]] * 13
-                pose_keypoints_pose_net[5] = left_shoulder
-                pose_keypoints_pose_net[6] = right_shoulder
-                pose_keypoints_pose_net[11] = left_hip
-                pose_keypoints_pose_net[12] = right_hip
+                pose_keypoints_pose_net[5] = (left_shoulder[0], left_shoulder[1])
+                pose_keypoints_pose_net[6] = (right_shoulder[0], right_shoulder[1])
+                pose_keypoints_pose_net[11] = (left_hip[0], left_hip[1])
+                pose_keypoints_pose_net[12] = (right_hip[0], right_hip[1])
 
                 start_time_yolox = time.time()
-                (
-                    stethoscope_overlay_img,
-                    stethoscope_x,
-                    stethoscope_y,
-                ) = yolox_detector_inference(
-                    frame, yolox_inferencer, pose_keypoints_pose_net
+                stethoscope_overlay_img, stethoscope_x, stethoscope_y = (
+                    yolox_detector_inference(
+                        frame, yolox_inferencer, pose_keypoints_pose_net
+                    )
                 )
                 end_time_yolox = time.time()
-                timings["yolox_single"].append(end_time_yolox - start_time_yolox)
+                yolox_time = end_time_yolox - start_time_yolox
+                timings["yolox_single"].append(yolox_time)
 
-        # (A') -- RTMPose + YOLOX の検出処理終了時刻 (パイプライン計測用)
-        pipeline_detection_end = time.time()
-        detection_time = pipeline_detection_end - pipeline_detection_start
-
-        # 可視化結果を保存
-        if (RTMPOSE_ENABLED or POSENET_ENABLED) and (
-            YOLOX_ENABLED or MobileNetV1SSD_ENABLED
-        ):
-            if RTMPOSE_ENABLED:
+                # 可視化
                 cv2.imwrite(
-                    os.path.join(pose_overlay_dir, image_file_name),
-                    cv2.cvtColor(pose_overlay_img, cv2.COLOR_RGB2BGR),
-                )
-            else:
-                cv2.imwrite(
-                    os.path.join(pose_overlay_dir, image_file_name),
-                    pose_overlay_img,
+                    os.path.join(stethoscope_overlay_dir, image_file_name),
+                    stethoscope_overlay_img,
                 )
 
-            cv2.imwrite(
-                os.path.join(stethoscope_overlay_dir, image_file_name),
-                stethoscope_overlay_img,
-            )
+        # ここで、(RTMPose + YOLOX) の合計検出時間をパイプラインに使う場合あり
+        detection_time_rtmpose_yolox = rtmpose_time + yolox_time
 
-        # ============================================================
-        # (3) CSV用に肩・腰・聴診器座標をまとめる
-        # ============================================================
-        if POSENET_ENABLED:
-            row = {
-                "image_file_name": image_file_name,
-                "left_shoulder_x": left_shoulder[1],
-                "left_shoulder_y": left_shoulder[0],
-                "right_shoulder_x": right_shoulder[1],
-                "right_shoulder_y": right_shoulder[0],
-                "left_hip_x": left_hip[1],
-                "left_hip_y": left_hip[0],
-                "right_hip_x": right_hip[1],
-                "right_hip_y": right_hip[0],
-                "stethoscope_x": stethoscope_x,
-                "stethoscope_y": stethoscope_y,
-            }
-        else:
-            row = {
-                "image_file_name": image_file_name,
-                "left_shoulder_x": left_shoulder[0],
-                "left_shoulder_y": left_shoulder[1],
-                "right_shoulder_x": right_shoulder[0],
-                "right_shoulder_y": right_shoulder[1],
-                "left_hip_x": left_hip[0],
-                "left_hip_y": left_hip[1],
-                "right_hip_x": right_hip[0],
-                "right_hip_y": right_hip[1],
-                "stethoscope_x": stethoscope_x,
-                "stethoscope_y": stethoscope_y,
-            }
+        # CSV用に座標をまとめる
+        row = {
+            "image_file_name": image_file_name,
+            "left_shoulder_x": left_shoulder[0],
+            "left_shoulder_y": left_shoulder[1],
+            "right_shoulder_x": right_shoulder[0],
+            "right_shoulder_y": right_shoulder[1],
+            "left_hip_x": left_hip[0],
+            "left_hip_y": left_hip[1],
+            "right_hip_x": right_hip[0],
+            "right_hip_y": right_hip[1],
+            "stethoscope_x": stethoscope_x,
+            "stethoscope_y": stethoscope_y,
+        }
 
-        # (C) EARSNET
+        # (C) EARSNet 単体
+        #  -> pipeline_earsnet は RTMPose, YOLOX を含まない
         if EARSNET_ENABLED:
-            start_earsnet = time.time()
+            start_time_earsnet = time.time()
             earsnet_x, earsnet_y = earsnet_predictor.predict(image_path)
-            end_earsnet = time.time()
-            timings["earsnet_single"].append(end_earsnet - start_earsnet)
+            end_time_earsnet = time.time()
+
+            earsnet_time = end_time_earsnet - start_time_earsnet
+            timings["earsnet_single"].append(earsnet_time)
+
+            # pipeline_earsnet = earsnet単体時間
+            timings["pipeline_earsnet"].append(earsnet_time)
 
             row["earsnet_stethoscope_x"] = earsnet_x
             row["earsnet_stethoscope_y"] = earsnet_y
 
-            # EARSNETパイプライン時間 (単体処理として計測しておく)
-            pipeline_earsnet_time = end_earsnet - start_earsnet
-            timings["pipeline_earsnet"].append(pipeline_earsnet_time)
+        # (D) クロップ画像 EARSNet ( RTMPose + EARSNet_Cropped )
+        if EARSNET_CROP_ENABLED:
+            # 1) クロップ生成
+            cropped_img, (crop_xmin, crop_ymin) = crop_body_from_keypoints(
+                frame, left_shoulder, right_shoulder, left_hip, right_hip
+            )
+            # クロップ画像を保存（確認用）
+            cropped_filename = os.path.splitext(image_file_name)[0] + "_cropped.png"
+            cv2.imwrite(os.path.join(cropped_dir, cropped_filename), cropped_img)
 
-        rows.append(row)
+            # 2) EARSNet (クロップ版)
+            start_time_earsnet_cropped = time.time()
+            earsnet_cropped_x, earsnet_cropped_y = earsnet_cropped_predictor.predict(
+                os.path.join(cropped_dir, cropped_filename)
+            )
+            end_time_earsnet_cropped = time.time()
 
-        # ============================================================
-        # (4) 正規化処理 (4点＋聴診器)
-        # ============================================================
+            earsnet_cropped_time = end_time_earsnet_cropped - start_time_earsnet_cropped
+            timings["earsnet_cropped_single"].append(earsnet_cropped_time)
+
+            # pipeline_earsnet_cropped = RTMPose時間 + EARSNet(クロップ)
+            pipeline_earsnet_cropped_time = rtmpose_time + earsnet_cropped_time
+            timings["pipeline_earsnet_cropped"].append(pipeline_earsnet_cropped_time)
+
+            # 3) 座標を元画像に変換
+            global_x = earsnet_cropped_x
+            global_y = earsnet_cropped_y
+
+            row["earsnet_crop_stethoscope_x"] = global_x
+            row["earsnet_crop_stethoscope_y"] = global_y
+
+        # (E) 正規化
         source_points = np.array(
             [
-                [float(row[f"{pos}_x"]), float(row[f"{pos}_y"])]
-                for pos in ["left_shoulder", "right_shoulder", "left_hip", "right_hip"]
+                [float(row["left_shoulder_x"]), float(row["left_shoulder_y"])],
+                [float(row["right_shoulder_x"]), float(row["right_shoulder_y"])],
+                [float(row["left_hip_x"]), float(row["left_hip_y"])],
+                [float(row["right_hip_x"]), float(row["right_hip_y"])],
             ],
             dtype=np.float32,
         )
+
         stethoscope_point = np.array(
             [float(row["stethoscope_x"]), float(row["stethoscope_y"])]
         )
@@ -649,17 +719,43 @@
             "stethoscope_x": normalized_points[4, 0],
             "stethoscope_y": normalized_points[4, 1],
         }
-        if EARSNET_ENABLED:
-            normalized_row["earsnet_stethoscope_x"] = row["earsnet_stethoscope_x"]
-            normalized_row["earsnet_stethoscope_y"] = row["earsnet_stethoscope_y"]
 
+        if EARSNET_ENABLED:
+            stetho_point_earsnet = np.array(
+                [
+                    float(row.get("earsnet_stethoscope_x", 0)),
+                    float(row.get("earsnet_stethoscope_y", 0)),
+                ]
+            )
+            norm_earsnet = normalize_quadrilateral_with_point(
+                source_points.flatten(), stetho_point_earsnet
+            )
+            normalized_row["earsnet_stethoscope_x"] = norm_earsnet[4, 0]
+            normalized_row["earsnet_stethoscope_y"] = norm_earsnet[4, 1]
+
+        if EARSNET_CROP_ENABLED:
+            stetho_point_crop = np.array(
+                [
+                    float(row.get("earsnet_crop_stethoscope_x", 0)),
+                    float(row.get("earsnet_crop_stethoscope_y", 0)),
+                ]
+            )
+            norm_earsnet_crop = normalize_quadrilateral_with_point(
+                source_points.flatten(), stetho_point_crop
+            )
+            normalized_row["earsnet_crop_stethoscope_x"] = norm_earsnet_crop[4, 0]
+            normalized_row["earsnet_crop_stethoscope_y"] = norm_earsnet_crop[4, 1]
+
+        rows.append(row)
         normalized_rows.append(normalized_row)
 
-        # (5) パイプライン推論 (Conv, LightGBM, XGBoost, etc.)
+        # (F) パイプライン (RTMPose+YOLOX → Conv/LightGBM/XGBoost)
+        # ここは従来通り: detection_time_rtmpose_yolox + 各モデル時間
+
         if RTMPOSE_ENABLED and YOLOX_ENABLED:
             # conv
             if CONV_ENABLED:
-                conv_start = time.time()
+                start_conv = time.time()
                 source_pts = np.array(
                     [
                         [float(row[f"{pos}_x"]), float(row[f"{pos}_y"])]
@@ -675,49 +771,52 @@
                 stetho_pt = np.array(
                     [float(row["stethoscope_x"]), float(row["stethoscope_y"])]
                 )
-                conv_stethoscope = calc_position.calc_affine(source_pts, *stetho_pt)
-                conv_end = time.time()
-                timings["conv_single"].append(conv_end - conv_start)
+                _ = calc_position.calc_affine(source_pts, *stetho_pt)
+                end_conv = time.time()
+                conv_time = end_conv - start_conv
+                timings["conv_single"].append(conv_time)
 
-                pipeline_time_conv = detection_time + (conv_end - conv_start)
-                timings["pipeline_rtmpose_yolox_conv"].append(pipeline_time_conv)
+                # pipeline_rtmpose_yolox_conv
+                timings["pipeline_rtmpose_yolox_conv"].append(
+                    detection_time_rtmpose_yolox + conv_time
+                )
 
             # XGBoost
             if XGBOOST_ENABLED:
                 xg_start = time.time()
-                input_data_xg = (
-                    pd.DataFrame([normalized_rows[-1]])
-                    if NORMALIZE_ENABLED
-                    else pd.DataFrame([rows[-1]])
-                )
+                if NORMALIZE_ENABLED:
+                    input_data_xg = pd.DataFrame([normalized_rows[-1]])
+                else:
+                    input_data_xg = pd.DataFrame([rows[-1]])
                 X_scaled_x = xg_scaler_x.transform(input_data_xg[input_columns])
-                xg_x_pred = int(xg_model_x.predict(X_scaled_x)[0])
+                _ = xg_model_x.predict(X_scaled_x)[0]
                 X_scaled_y = xg_scaler_y.transform(input_data_xg[input_columns])
-                xg_y_pred = int(xg_model_y.predict(X_scaled_y)[0])
+                _ = xg_model_y.predict(X_scaled_y)[0]
                 xg_end = time.time()
-                timings["xgboost_single"].append(xg_end - xg_start)
+                xg_time = xg_end - xg_start
+                timings["xgboost_single"].append(xg_time)
 
-                pipeline_time_xgboost = detection_time + (xg_end - xg_start)
-                timings["pipeline_rtmpose_yolox_xgboost"].append(pipeline_time_xgboost)
+                timings["pipeline_rtmpose_yolox_xgboost"].append(
+                    detection_time_rtmpose_yolox + xg_time
+                )
 
             # LightGBM
             if LIGHTGBM_ENABLED:
                 lgb_start = time.time()
-                input_data_lgb = (
-                    pd.DataFrame([normalized_rows[-1]])
-                    if NORMALIZE_ENABLED
-                    else pd.DataFrame([rows[-1]])
-                )
+                if NORMALIZE_ENABLED:
+                    input_data_lgb = pd.DataFrame([normalized_rows[-1]])
+                else:
+                    input_data_lgb = pd.DataFrame([rows[-1]])
                 X_scaled_x = lgb_scaler_x.transform(input_data_lgb[input_columns])
-                lgb_x_pred = int(lgb_model_x.predict(X_scaled_x)[0])
+                _ = lgb_model_x.predict(X_scaled_x)[0]
                 X_scaled_y = lgb_scaler_y.transform(input_data_lgb[input_columns])
-                lgb_y_pred = int(lgb_model_y.predict(X_scaled_y)[0])
+                _ = lgb_model_y.predict(X_scaled_y)[0]
                 lgb_end = time.time()
-                timings["lightgbm_single"].append(lgb_end - lgb_start)
+                lgb_time = lgb_end - lgb_start
+                timings["lightgbm_single"].append(lgb_time)
 
-                pipeline_time_lightgbm = detection_time + (lgb_end - lgb_start)
                 timings["pipeline_rtmpose_yolox_lightgbm"].append(
-                    pipeline_time_lightgbm
+                    detection_time_rtmpose_yolox + lgb_time
                 )
 
         processed_frames += 1
@@ -726,42 +825,22 @@
     # CSV 書き込み
     # ========================================================================
     if rows:
-        print(f"Writing {len(rows)} rows to CSV...")
         fieldnames = list(rows[0].keys())
-        if CONV_ENABLED:
-            fieldnames.extend(["conv_stethoscope_x", "conv_stethoscope_y"])
-        if XGBOOST_ENABLED:
-            fieldnames.extend(["Xgboost_stethoscope_x", "Xgboost_stethoscope_y"])
-        if LIGHTGBM_ENABLED:
-            fieldnames.extend(["lightGBM_stethoscope_x", "lightGBM_stethoscope_y"])
-        if CATBOOST_ENABLED:
-            fieldnames.extend(["catboost_stethoscope_x", "catboost_stethoscope_y"])
-        if NGBOOST_ENABLED:
-            fieldnames.extend(["ngboost_stethoscope_x", "ngboost_stethoscope_y"])
-
         csvfile_path = os.path.join(results_dir, "results.csv")
         normfile_path = os.path.join(results_dir, "results-convert.csv")
+
+        os.makedirs(results_dir, exist_ok=True)
+
         with (
             open(csvfile_path, "w", newline="") as csvfile,
             open(normfile_path, "w", newline="") as norm_csvfile,
         ):
             writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
-            norm_writer = csv.DictWriter(norm_csvfile, fieldnames=fieldnames)
             writer.writeheader()
-            norm_writer.writeheader()
 
-            # 前回値を保持する辞書 (未検出時に使いたい場合)
-            prev_values = {}
-            if CONV_ENABLED:
-                prev_values["conv"] = (180, 180)
-            if LIGHTGBM_ENABLED:
-                prev_values["lightGBM"] = (180, 180)
-            if XGBOOST_ENABLED:
-                prev_values["Xgboost"] = (180, 180)
-            if CATBOOST_ENABLED:
-                prev_values["catboost"] = (180, 180)
-            if NGBOOST_ENABLED:
-                prev_values["ngboost"] = (180, 180)
+            norm_fieldnames = list(normalized_rows[0].keys())
+            norm_writer = csv.DictWriter(norm_csvfile, fieldnames=norm_fieldnames)
+            norm_writer.writeheader()
 
             for row_, norm_row_ in zip(rows, normalized_rows):
                 writer.writerow(row_)
@@ -770,12 +849,13 @@
         print(f"Processed and saved results to: {csvfile_path}")
         print(f"Processed and saved normalized results to: {normfile_path}")
 
+        # 可視化・動画化
         generate_visualizations(csvfile_path, base_dir, results_dir)
     else:
         print("No data to write to CSV.")
 
     # ========================================================================
-    # (6) FPS計算 & CSV保存 (サブコンポーネント＆パイプラインごとの合計/平均)
+    # FPS計算 & CSV保存 (サブコンポーネント＆パイプラインごとの合計/平均)
     # ========================================================================
     fps_data = []
     for method_name, time_list in timings.items():
@@ -819,14 +899,18 @@
         )
 
 
+###############################################################################
+# 可視化・動画化
+###############################################################################
 def generate_visualizations(csv_path, original_images_dir, results_dir):
     """
-    NaNが混入した場合に描画でエラーにならないように修正。
-    聴診器の推論結果がNaNの場合は描画をスキップする。
+    CSVに書き込んだ推定結果を用い、BodyF.pngへの描画や動画化を行う。
+    EARSNetクロップ版の結果も描画できるように調整。
     """
     df = pd.read_csv(csv_path)
     body_image = cv2.imread("./images/body/BodyF.png")
 
+    # 生成ディレクトリ設定
     dirs = {"marked": "marked_images"}
     if CONV_ENABLED:
         dirs["conv"] = "conv"
@@ -840,6 +924,9 @@
         dirs["ngboost"] = "ngboost"
     if EARSNET_ENABLED:
         dirs["earsnet"] = "earsnet"
+    if EARSNET_CROP_ENABLED:
+        dirs["earsnet_crop"] = "earsnet_crop"
+
     dirs["combined"] = "combined"
 
     os.makedirs(os.path.join(results_dir, "marked_images"), exist_ok=True)
@@ -853,7 +940,10 @@
                 exist_ok=True,
             )
 
-    points = {key: [] for key in dirs.keys() if key != "marked"}
+    # 描画に使う座標列
+    points = {key: [] for key in dirs.keys() if key not in ["marked", "combined"]}
+
+    # 色設定
     colors = {
         "conv": CONV_COLOR,
         "Xgboost": XGBOOST_COLOR,
@@ -861,13 +951,13 @@
         "catboost": CATBOOST_COLOR,
         "ngboost": NGBOOST_COLOR,
         "earsnet": EARSNET_COLOR,
+        "earsnet_crop": (255, 51, 255),  # ピンク系
     }
 
     for _, row in df.iterrows():
         original_image_path = os.path.join(original_images_dir, row["image_file_name"])
         if not os.path.exists(original_image_path):
             continue
-
         original_image = cv2.imread(original_image_path)
         if original_image is None:
             continue
@@ -885,7 +975,6 @@
             if col_x in row and col_y in row:
                 val_x = row[col_x]
                 val_y = row[col_y]
-                # NaNチェック
                 if pd.isna(val_x) or pd.isna(val_y):
                     continue
                 cv2.circle(
@@ -895,46 +984,48 @@
                     (255, 255, 0),
                     -1,
                 )
-
+        # 保存
+        marked_dir = os.path.join(results_dir, "marked_images")
         cv2.imwrite(
-            os.path.join(results_dir, "marked_images", row["image_file_name"]),
+            os.path.join(marked_dir, row["image_file_name"]),
             original_image,
         )
 
-        # BodyF.png の上に軌跡を描画する
+        # BodyF.png に軌跡を描画
         combined_image_with_traj = body_image.copy()
         combined_image_without_traj = body_image.copy()
 
-        for key in points:
+        # 各推定結果を描画
+        for key in points.keys():
             col_x = f"{key}_stethoscope_x"
             col_y = f"{key}_stethoscope_y"
             if col_x not in row or col_y not in row:
                 continue
-
             val_x = row[col_x]
             val_y = row[col_y]
-            # NaNであればスキップ
             if pd.isna(val_x) or pd.isna(val_y):
                 continue
 
             x, y = int(val_x), int(val_y)
             points[key].append((x, y))
 
-            # 1) 個別 with trajectory
+            color = colors[key] if key in colors else (0, 0, 255)
+
+            # 個別 with trajectory
             image_with_trajectory = body_image.copy()
             if len(points[key]) > 1:
                 cv2.polylines(
                     image_with_trajectory,
                     [np.array(points[key])],
                     False,
-                    colors.get(key, (0, 0, 255)),
+                    color,
                     2,
                 )
             cv2.circle(
                 image_with_trajectory,
                 (x, y),
                 10,
-                colors.get(key, (0, 0, 255)),
+                color,
                 -1,
             )
             cv2.imwrite(
@@ -944,13 +1035,13 @@
                 image_with_trajectory,
             )
 
-            # 2) 個別 without trajectory
+            # 個別 without trajectory
             image_without_trajectory = body_image.copy()
             cv2.circle(
                 image_without_trajectory,
                 (x, y),
                 10,
-                colors.get(key, (0, 0, 255)),
+                color,
                 -1,
             )
             cv2.imwrite(
@@ -962,28 +1053,28 @@
                 image_without_trajectory,
             )
 
-            # 3) combined with trajectory
+            # combined with trajectory
             if len(points[key]) > 1:
                 cv2.polylines(
                     combined_image_with_traj,
                     [np.array(points[key])],
                     False,
-                    colors.get(key, (0, 0, 255)),
+                    color,
                     2,
                 )
             cv2.circle(
                 combined_image_with_traj,
                 (x, y),
                 10,
-                colors.get(key, (0, 0, 255)),
+                color,
                 -1,
             )
-            # 4) combined without trajectory
+            # combined without trajectory
             cv2.circle(
                 combined_image_without_traj,
                 (x, y),
                 10,
-                colors.get(key, (0, 0, 255)),
+                color,
                 -1,
             )
 
@@ -1015,7 +1106,7 @@
     )
 
     for key in dirs:
-        if key != "marked":
+        if key not in ["marked", "combined"]:
             create_video_from_images(
                 os.path.join(results_dir, f"{dirs[key]}_with_trajectory"),
                 os.path.join(results_dir, f"{key}_video_with_trajectory.mp4"),
@@ -1027,9 +1118,6 @@
 
 
 def create_video_from_images(image_dir, output_path):
-    """
-    指定ディレクトリ内の PNG 画像を1つの動画に変換する
-    """
     if not os.path.exists(image_dir):
         return
     images = sorted(
@@ -1042,6 +1130,9 @@
         return
 
     frame = cv2.imread(os.path.join(image_dir, images[0]))
+    if frame is None:
+        print(f"Failed to read the first image in {image_dir}")
+        return
     height, width, _ = frame.shape
 
     video = cv2.VideoWriter(
@@ -1051,7 +1142,8 @@
     for image in images:
         img_path = os.path.join(image_dir, image)
         img = cv2.imread(img_path)
-        video.write(img)
+        if img is not None:
+            video.write(img)
 
     video.release()
     print(f"Created video: {output_path}")
@@ -1069,6 +1161,8 @@
         default="output",
         help="Directory to save output images and results",
     )
+
+    # RTMpose 用の config & checkpoint (必要に応じて変更)
     det_config = "modules/rtmpose/mmdetection_cfg/rtmdet_m_640-8xb32_coco-person.py"
     det_checkpoint = (
         "models/rtmpose/rtmdet_m_8xb32-100e_coco-obj365-person-235e8209.pth"
@@ -1083,26 +1177,20 @@
 
     os.makedirs(args.output_dir, exist_ok=True)
 
-    # -------------------------
     # 1) FPSモニタ用スレッド開始
-    # -------------------------
     fps_thread = Thread(target=fps_monitor, args=(1.0,), daemon=True)
     fps_thread.start()
 
-    # -------------------------
     # 2) 動画をフレームに分割
-    # -------------------------
     frames_dir = os.path.join(args.output_dir, "frames")
     video_to_frames(args.video_path, frames_dir)
 
-    # -------------------------
-    # 3) RTMPOSE初期化 (必要に応じて)
-    # -------------------------
+    # 3) RTMPOSE初期化 (必要なときのみ)
     if RTMPOSE_ENABLED:
-        detector = init_detector(det_config, det_checkpoint, device="cuda:0")
+        detector = init_detector(det_config, det_checkpoint, device=DEVICE)
         detector.cfg = adapt_mmdet_pipeline(detector.cfg)
         pose_estimator = init_pose_estimator(
-            pose_config, pose_checkpoint, device="cuda:0"
+            pose_config, pose_checkpoint, device=DEVICE
         )
         visualizer = VISUALIZERS.build(pose_estimator.cfg.visualizer)
         visualizer.set_dataset_meta(
@@ -1113,9 +1201,7 @@
     else:
         process_images(args, None, None, None)
 
-    # -------------------------
     # 4) スレッド終了指示・join
-    # -------------------------
     global stop_fps_thread
     stop_fps_thread = True
     fps_thread.join()