ivus-complication-annotation-tool/utils/excel.py at 8168a1d7f8a784a730b0225eecb5eb78d4eece8f

Fork: 0
kaneko / ivus-complication-annotation-tool
Find file
Newer
Older
ivus-complication-annotation-tool / utils / excel.py
keishi (Keishi Kaneko) on 8 Dec 8 KB 実装完了 ver1
Raw Blame History
import os
import re
import pandas as pd
from typing import Dict, Optional, Tuple, Union


class Excel:
    def __init__(self, exel_path):
        self.path = exel_path
    
    def extract_data(self):
        df = pd.read_excel(self.path)

        #　key : Case Numner, value : 合併症の有無（True : 合併症あり，False : 合併症なし）
        data = {}

        for _, row in df.iterrows():
            # if row["除外理由"]:
            #     continue
            if pd.notna(row["No"]) and pd.notna(row["No/Slow flow during procedure"]) and pd.notna(row["IVUS解析除外"]):
                case_num = int(row["No"])

                if row["No/Slow flow during procedure"] == 1:
                    data[int(case_num)] = True
                elif row["No/Slow flow during procedure"] == 0:
                    data[int(case_num)] = False

        # for k, v in data.items():
        #     print(f"Case Number : {k}, Complication : {v}")
            
        return data
    
    def rename(self, root_dir):
        data_dict = self.extract_data()
        No_data = []

        for case in os.listdir(root_dir):
            case_dir = os.path.join(root_dir, case)
            if os.path.isdir(case_dir):
                images_dir = os.path.join(case_dir, "images") # IVUS元画像用のディレクトリ

                for image in os.listdir(images_dir):
                    image_path = os.path.join(images_dir, image)
                    rest_path, ext = os.path.splitext(image)    
                    parts = rest_path.split("_")
                    case_num = int(float(parts[1]))

                    # ファイル名に "_T" または "_F" が含まれている画像ファイルはリネーム処理の対象外
                    if image.endswith("_T.png") or image.endswith("_F.png"):
                        continue

                    if case_num not in data_dict:
                        No_data.append(case_num)
                        continue

                    """
                        合併症の有無によってファイル名を変更
                        合併症の場合 : frame_49_4020_T.png, frame_49_4020_F.png
                    """
                    if data_dict[case_num]:
                        new_image_name = f"{parts[0]}_{case_num}_{parts[-1]}_T{ext}"
                    else:
                        new_image_name = f"{parts[0]}_{case_num}_{parts[-1]}_F{ext}"
                    
                    new_image_path = os.path.join(images_dir, new_image_name)
                    os.rename(image_path, new_image_path)
        
        No_data = sorted(set(No_data))
        for case_num in No_data:
            print(f"Case Number : {case_num} is not found in the excel file.")
        print(f"Total {len(No_data)} cases are not found in the excel file.")
    
    def improve_name(self, root_dir):
        for case in os.listdir(root_dir):
            case_dir = os.path.join(root_dir, case)
            if os.path.isdir(case_dir):
                images_dir = os.path.join(case_dir, "images")

                for image in os.listdir(images_dir):
                    image_path = os.path.join(images_dir, image)
                    
                    rest_name, ext = os.path.splitext(image)
                    parts = rest_name.split("_")

                    if parts[-2] == "F" or parts[-2] == "T":
                        new_image_name = f"{parts[0]}_{parts[1]}_{parts[-1]}_{parts[-2]}{ext}"
                        new_image_path = os.path.join(images_dir, new_image_name)
                        os.rename(image_path, new_image_path)

    def restore_name(self, root_dir):
        for case in os.listdir(root_dir):
            case_dir = os.path.join(root_dir, case)
            if os.path.isdir(case_dir):
                images_dir = os.path.join(case_dir, 'images')

                for image in os.listdir(images_dir):
                    image_path = os.path.join(images_dir, image)

                    rest_name, ext = os.path.splitext(image)
                    parts = rest_name.split("_")    # frame_66_60_F

                    if parts[-1] == "F" or parts[-1] == "T":
                        new_image_name = f"{parts[0]}_{parts[1]}_{parts[2]}{ext}"
                        new_image_path = os.path.join(images_dir, new_image_name)
                        os.rename(image_path, new_image_path)


class DICOMTagCSVController:
    """
    DICOMタグ抽出で作成されたCSVファイルを制御するクラス
    """

    def __init__(self, csv_path: str):
        """
        初期化
        Args:
            csv_path (str): DICOMタグ抽出で作成されたCSVファイルのパス
        """
        self.csv_path = csv_path
        self.df = None
        self._load_csv()

    def _load_csv(self):
        """
        CSVファイルを読み込む
        """
        try:
            if not os.path.exists(self.csv_path):
                raise FileNotFoundError(f"CSVファイルが見つかりません: {self.csv_path}")
            
            self.df = pd.read_csv(self.csv_path)
        except Exception as e:
            raise ValueError(f"CSVファイルの読み込みエラー: {e}")
        
    def get_pixel_spacing(self, case_id: Union[int, str]) -> Optional[Tuple[float, float]]:
        """
        指定された症例番号のPixel Spacingを取得("pre"がついているもので，最大の数値のもの)
        """
        try:
            # case_idを文字列または数値として検索
            if isinstance(case_id, str):
                mask = self.df['case_id'].astype(str) == str(case_id)
            else:
                mask = self.df['case_id'] == case_id

            matching_rows = self.df[mask]

            if len(matching_rows) == 0:
                raise ValueError(f"症例番号 {case_id} がDICOMタグ抽出で作成されたCSVファイルに見つかりません")
            
            # "pre"が含まれている行のみをフィルタリング
            pre_rows = matching_rows[matching_rows['case_name'].str.contains("pre", na=False)]

            if len(pre_rows) == 0:
                raise ValueError(f"症例番号 {case_id} に対して，'pre'が含まれている行が見つかりません")
            
            # 複数の"pre"が含まれている行が存在する場合，最大の数値を持つものを選択
            if len(pre_rows) > 1:
                max_pre_number = -1
                selected_row = None

                for idx, row in pre_rows.iterrows():
                    case_name = row['case_name']

                    # "pre"の後の数字を抽出
                    match = re.search(r'pre(\d+)', case_name)
                    if match:
                        pre_number = int(match.group(1))
                        if pre_number > max_pre_number:
                            max_pre_number = pre_number
                            selected_row = row
                    else:
                        # "pre"のみで数字がない場合
                        if max_pre_number == -1:
                            selected_row = row
                
                if selected_row is None:
                    raise ValueError(f"症例番号 {case_id} に対して，'pre'のみで数字がない行が見つかりました")
                
                row_data = selected_row
            else:
                # pre_rowsが1行の場合
                row_data = pre_rows.iloc[0]
            
            # pixel spacingを取得
            pixel_spacing_row = row_data['pixel_spacing_row']
            pixel_spacing_col = row_data['pixel_spacing_col']

            if pd.isna(pixel_spacing_row) or pd.isna(pixel_spacing_col):
                raise ValueError(f"症例番号 {case_id} に対して，Pixel Spacingが見つかりません")
            
            return float(pixel_spacing_row), float(pixel_spacing_col)
        
        except Exception as e:
            raise ValueError(f"Pixel Spacingの取得エラー: {e}")

if __name__ == '__main__':
    excel_path = "../../../data/CHIBAMI_case_list_2023rev20240523.xlsx"
    root_path = "../../../data/list"
    # excel_path = "../../Data/CHIBAMI_case_list.xlsx"
    # root_path = "../../Data/list"

    excel = Excel(excel_path)
    data = excel.extract_data()
    # print(data)

    # excel.rename(root_path)
    # excel.improve_name(root_path) 
    # excel.restore_name(root_path)