"""
Data loading utilities for IVUS annotation tool.
Handles case discovery, image loading, and ground truth labels.
"""
import os
import glob
from typing import List, Dict, Tuple, Union
from pathlib import Path
from .excel import Excel
def get_all_case_ids(data_root: str) -> List[Union[int, float]]:
"""
Scan data directory and extract case numbers from folder names.
Handles both integer case IDs (134) and decimal case IDs (134.1, 134.2).
Args:
data_root: Root directory containing case folders
Returns:
List of case numbers (int or float) sorted in ascending order
Example:
>>> get_all_case_ids("/mnt/d/Research/data/2025_10_30_updated")
[134, 134.1, 134.2, 135, 135.1, ..., 795]
"""
if not os.path.exists(data_root):
return []
try:
case_dirs = [
d for d in os.listdir(data_root)
if os.path.isdir(os.path.join(data_root, d))
and d.startswith("CHIBAMI_")
and d.endswith("_pre")
]
case_ids = []
for case_dir in case_dirs:
try:
# Extract number from "CHIBAMI_134_pre" or "CHIBAMI_134.1_pre"
# Split by underscore: ["CHIBAMI", "134", "pre"] or ["CHIBAMI", "134.1", "pre"]
parts = case_dir.split("_")
if len(parts) >= 3:
# Parse as float to preserve decimals, then convert to int if whole number
case_num_float = float(parts[1])
# If it's a whole number, store as int, otherwise as float
if case_num_float.is_integer():
case_ids.append(int(case_num_float))
else:
case_ids.append(case_num_float)
except (IndexError, ValueError):
# Skip folders that don't match expected pattern
continue
return sorted(case_ids)
except Exception as e:
print(f"Error scanning directory {data_root}: {e}")
return []
def get_case_images(data_root: str, case_id: Union[int, float]) -> List[str]:
"""
Get all image paths for a given case, sorted by frame index.
Args:
data_root: Root directory containing case folders
case_id: Case number (int or float, e.g., 134 or 134.1)
Returns:
List of absolute paths to PNG files, sorted by frame index
Example:
>>> get_case_images("/mnt/d/Research/data/2025_10_30_updated", 134)
['/mnt/d/.../CHIBAMI_134_pre/images/frame_134_4440.png', ...]
>>> get_case_images("/mnt/d/Research/data/2025_10_30_updated", 134.1)
['/mnt/d/.../CHIBAMI_134.1_pre/images/frame_134_4440.png', ...]
"""
# Construct folder name based on case_id type
if isinstance(case_id, float) and not case_id.is_integer():
# Decimal case: CHIBAMI_134.1_pre
case_dir = os.path.join(data_root, f"CHIBAMI_{case_id}_pre")
else:
# Integer case: CHIBAMI_134_pre
case_dir = os.path.join(data_root, f"CHIBAMI_{int(case_id)}_pre")
if not os.path.exists(case_dir):
return []
images_dir = os.path.join(case_dir, "images")
if not os.path.exists(images_dir):
return []
# Get all PNG files matching the frame pattern
image_files = glob.glob(os.path.join(images_dir, "frame_*.png"))
# Sort by frame index (extract from filename)
def extract_frame_idx(filepath):
"""Extract frame index from filename like 'frame_134_4440.png'."""
filename = os.path.basename(filepath)
try:
# Remove extension: "frame_134_4440.png" -> "frame_134_4440"
rest_name = filename.replace(".png", "")
# Split: ["frame", "134", "4440"]
parts = rest_name.split("_")
# Return the last part as integer
return int(parts[-1])
except (IndexError, ValueError):
return 0
return sorted(image_files, key=extract_frame_idx)
def load_ground_truth_labels(excel_path: str) -> Dict[Union[int, float], bool]:
"""
Load ground truth labels from Excel file.
Args:
excel_path: Path to Excel file containing ground truth labels
Returns:
Dictionary mapping case_id to has_complication boolean
- True: Complication present (No/Slow flow)
- False: No complication
Example:
>>> load_ground_truth_labels("/path/to/labels.xlsx")
{134: True, 134.1: False, 135: False, ...}
"""
try:
excel = Excel(excel_path)
return excel.extract_data()
except FileNotFoundError:
print(f"Warning: Excel file not found: {excel_path}")
return {}
except Exception as e:
print(f"Warning: Could not load Excel file: {e}")
return {}
def validate_case_directory(data_root: str, case_id: Union[int, float]) -> Tuple[bool, str]:
"""
Validate that a case has accessible images.
Args:
data_root: Root directory containing case folders
case_id: Case number to validate (int or float)
Returns:
Tuple of (is_valid, error_message)
- If valid: (True, "")
- If invalid: (False, "Error description")
Example:
>>> validate_case_directory("/mnt/d/Research/data/2025_10_30_updated", 134)
(True, "")
>>> validate_case_directory("/mnt/d/Research/data/2025_10_30_updated", 999)
(False, "Case directory not found: ...")
"""
# Construct folder name based on case_id type
if isinstance(case_id, float) and not case_id.is_integer():
case_dir = os.path.join(data_root, f"CHIBAMI_{case_id}_pre")
case_name = f"CHIBAMI_{case_id}_pre"
else:
case_dir = os.path.join(data_root, f"CHIBAMI_{int(case_id)}_pre")
case_name = f"CHIBAMI_{int(case_id)}_pre"
if not os.path.exists(case_dir):
return False, f"Case directory not found: {case_name}"
if not os.path.isdir(case_dir):
return False, f"Path is not a directory: {case_dir}"
images_dir = os.path.join(case_dir, "images")
if not os.path.exists(images_dir):
return False, f"Images directory not found: {images_dir}"
if not os.path.isdir(images_dir):
return False, f"Images path is not a directory: {images_dir}"
images = get_case_images(data_root, case_id)
if len(images) == 0:
return False, f"No PNG images found in: {images_dir}"
return True, ""