"""
dataset
confirmed/ から学習データを読み込むモジュール

画像を flatten した特徴量ベクトルとラベルを返す
"""

from pathlib import Path

import cv2
import numpy as np

from pc.data.collector import (
    CONFIRMED_DIR,
    LABEL_INTERSECTION,
    LABEL_NORMAL,
)


def load_dataset(
    confirmed_dir: Path = CONFIRMED_DIR,
) -> tuple[np.ndarray, np.ndarray]:
    """confirmed/ から画像とラベルを読み込む

    Args:
        confirmed_dir: 確定済みデータのディレクトリ

    Returns:
        (X, y) のタプル
        X: (n_samples, 1200) の特徴量行列（0.0/1.0）
        y: (n_samples,) のラベル配列（1=intersection, 0=normal）

    Raises:
        FileNotFoundError: 画像が見つからない場合
    """
    images: list[np.ndarray] = []
    labels: list[int] = []

    for label_name, label_val in (
        (LABEL_INTERSECTION, 1),
        (LABEL_NORMAL, 0),
    ):
        label_dir = confirmed_dir / label_name
        if not label_dir.is_dir():
            continue
        for img_path in sorted(label_dir.glob("*.png")):
            img = cv2.imread(
                str(img_path), cv2.IMREAD_GRAYSCALE,
            )
            if img is None:
                continue
            # 0/255 → 0.0/1.0 に正規化して flatten
            flat = (img.flatten() / 255.0).astype(
                np.float32,
            )
            images.append(flat)
            labels.append(label_val)

    if len(images) == 0:
        raise FileNotFoundError(
            f"画像が見つかりません: {confirmed_dir}"
        )

    x = np.array(images)
    y = np.array(labels)
    return x, y