Newer
Older
RARP / optimizeHP.ipynb
@delAguila delAguila on 22 Nov 2024 12 KB init comit
{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import Models as M\n",
    "import Loaders\n",
    "import defs\n",
    "import numpy as np\n",
    "import lightning.pytorch as pl\n",
    "import lightning.pytorch.callbacks as callbk\n",
    "import optuna\n",
    "from optuna.integration import PyTorchLightningPruningCallback\n",
    "from packaging import version\n",
    "import torch\n",
    "from torch.utils.data import DataLoader\n",
    "import torchvision\n",
    "from torchvision import transforms"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "OK\n"
     ]
    }
   ],
   "source": [
    "if version.parse(pl.__version__) < version.parse(\"1.6.0\"):\n",
    "    raise RuntimeError(\"PyTorch Lightning>=1.6.0 is required for this example.\")\n",
    "else:\n",
    "    print(\"OK\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 50,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "DataSet_SB_Ando_Crop_seed_None\\fold_0\n"
     ]
    }
   ],
   "source": [
    "def setup_seed(seed):\n",
    "    torch.manual_seed(seed)\n",
    "    torch.cuda.manual_seed_all(seed)\n",
    "    np.random.seed(seed)\n",
    "    torch.backends.cudnn.deterministic = True\n",
    "\n",
    "Dataset = Loaders.RARP_DatasetCreator(\n",
    "    \"./DataSet_AndoCrop\",\n",
    "    FoldSeed=None,\n",
    "    createFile=True,\n",
    "    SavePath=\"./DataSet_SB_Ando_Crop\",\n",
    "    Fold=5,\n",
    "    removeBlackBar=False,\n",
    ")\n",
    "\n",
    "Dataset.CreateFolds()\n",
    "\n",
    "Fold = 0\n",
    "\n",
    "    \n",
    "Dataset.mean, Dataset.std = ([30.38144216, 42.03988769, 97.8896116], [40.63141752, 44.26910074, 50.29294373])\n",
    "\n",
    "setup_seed(2023)\n",
    "device = torch.device(\"cuda:0\" if torch.cuda.is_available() else \"cpu\")\n",
    "\n",
    "InitResize = (256,256)\n",
    "ImgResize = (224, 224)\n",
    "\n",
    "rootFile = (Dataset.CVS_File.parent.parent/f\"fold_{Fold}\")\n",
    "print (rootFile)\n",
    "\n",
    "traintransform = torch.nn.Sequential(\n",
    "    transforms.Resize(InitResize),                                   \n",
    "    transforms.RandomCrop(ImgResize),\n",
    "    transforms.RandomAffine(\n",
    "        degrees=(-5, 5), scale=(0.9, 1.1), \n",
    "        fill=5\n",
    "    ),\n",
    "    transforms.RandomHorizontalFlip(1.0),\n",
    "    transforms.Normalize(Dataset.mean, Dataset.std),\n",
    ").to(device)\n",
    "\n",
    "valtransform = torch.nn.Sequential( \n",
    "    transforms.Resize(ImgResize),                                     \n",
    "    transforms.Normalize(Dataset.mean, Dataset.std)\n",
    ").to(device)\n",
    "\n",
    "testtransform =  torch.nn.Sequential(\n",
    "    transforms.Resize(ImgResize,antialias=True),\n",
    "    transforms.Normalize(Dataset.mean, Dataset.std)\n",
    ").to(device)\n",
    "\n",
    "trainDataset = torchvision.datasets.DatasetFolder(\n",
    "    str (rootFile/\"train\"),\n",
    "    loader=defs.load_file_tensor,\n",
    "    extensions=\"npy\",\n",
    "    transform=traintransform\n",
    ")\n",
    "\n",
    "valDataset = torchvision.datasets.DatasetFolder(\n",
    "    str (rootFile/\"val\"),\n",
    "    loader=defs.load_file_tensor,\n",
    "    extensions=\"npy\",\n",
    "    transform=valtransform\n",
    ")\n",
    "\n",
    "testDataset = torchvision.datasets.DatasetFolder(\n",
    "    str (rootFile/\"test\"),\n",
    "    loader=defs.load_file_tensor,\n",
    "    extensions=\"npy\",\n",
    "    transform=testtransform\n",
    ")        \n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 51,
   "metadata": {},
   "outputs": [],
   "source": [
    "modelSSL = M.RARP_NVB_ResNet50.load_from_checkpoint(\"./log_ResNet50_X10/lightning_logs/version_8/checkpoints/RARP-epoch=5.ckpt\")\n",
    "model = M.RARP_NVB_VAN(None, M.TypeLossFunction.BCEWithLogits)\n",
    "\n",
    "Test_DataLoader = DataLoader(\n",
    "    testDataset, \n",
    "    batch_size=8, \n",
    "    num_workers=0, \n",
    "    shuffle=False, \n",
    "    pin_memory=True\n",
    ")\n",
    "\n",
    "Train_DataLoader = DataLoader(\n",
    "    trainDataset, \n",
    "    batch_size=8, \n",
    "    num_workers=0, \n",
    "    shuffle=True, \n",
    "    pin_memory=True\n",
    ")\n",
    "\n",
    "Val_DataLoader = DataLoader(\n",
    "    valDataset, \n",
    "    batch_size=8, \n",
    "    num_workers=0, \n",
    "    shuffle=False, \n",
    "    pin_memory=True\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "\n",
    "EPOCHS = 150\n",
    "L1_ON = True\n",
    "L2_ON = False\n",
    "\n",
    "def objective(trail: optuna.trial.Trial) -> float:\n",
    "    lr = trail.suggest_float(\"lr\", 1e-4, 1e-2, log=True) \n",
    "    l1 = trail.suggest_float(\"L1\", 1e-5, 1e-3, log=True) if L1_ON else None\n",
    "    l2 = trail.suggest_float(\"L2\", 1e-5, 1e-4, log=True) if L2_ON else 0\n",
    "    #n_layers = trail.suggest_int(\"n_layers\", 1, 4)\n",
    "    #dropout = trail.suggest_float(\"dropout\", 0.2, 0.5)\n",
    "    #output_Dims = [\n",
    "    #    trail.suggest_int(\"n_layers_l{}\".format(i), 8, 512, log=True) for i in range(n_layers)\n",
    "    #]\n",
    "    \n",
    "    batchSize = trail.suggest_categorical(\"batch_size\", [8, 16, 32])\n",
    "    #lossFn = trail.suggest_categorical(\"LossFN\", [\n",
    "    #    M.TypeLossFunction.CrossEntropy,\n",
    "    #    M.TypeLossFunction.BCEWithLogits,\n",
    "    #    M.TypeLossFunction.FocalLoss,\n",
    "    #    M.TypeLossFunction.HingeLoss,\n",
    "    #])\n",
    "    Optimizer = trail.suggest_categorical(\"Optimizer\", [\"Adam\", \"Adamax\", \"Nadam\"])\n",
    "    numWorkers = 8\n",
    "    \n",
    "    model = M.RARP_NVB_ResNet50(None, M.TypeLossFunction.BCEWithLogits, config={\"lr\": lr, \"L1\": l1, \"L2\": l2, \"Optimizer\": Optimizer})\n",
    "    \n",
    "    Train_DataLoader = DataLoader(\n",
    "        trainDataset, \n",
    "        batch_size=batchSize, \n",
    "        num_workers=numWorkers, \n",
    "        shuffle=True, \n",
    "        pin_memory=True,\n",
    "        persistent_workers=True,\n",
    "    )\n",
    "\n",
    "    Val_DataLoader = DataLoader(\n",
    "        valDataset, \n",
    "        batch_size=batchSize, \n",
    "        num_workers=numWorkers, \n",
    "        shuffle=False, \n",
    "        pin_memory=True,\n",
    "        persistent_workers=True,\n",
    "    )\n",
    "\n",
    "    \n",
    "    \n",
    "    trainer = pl.Trainer(\n",
    "        logger=True,\n",
    "        enable_checkpointing=False,\n",
    "        max_epochs=EPOCHS,\n",
    "        accelerator=\"auto\",\n",
    "        log_every_n_steps=5,  \n",
    "        devices=1,\n",
    "        callbacks=[PyTorchLightningPruningCallback(trail, monitor=\"val_acc\")],\n",
    "    )\n",
    "    \n",
    "    hyperparameters = dict(\n",
    "        lr = lr,\n",
    "        l1 = l1,\n",
    "        l2 = l2,\n",
    "        #lossFN = lossFn,\n",
    "        OptimizerFN = Optimizer,\n",
    "        #n_layers = n_layers, \n",
    "        #dropout = dropout, \n",
    "        #output_dims = output_Dims,\n",
    "        batch_size = batchSize,\n",
    "    )\n",
    "    \n",
    "    trainer.logger.log_hyperparams(hyperparameters)\n",
    "    \n",
    "    trainer.fit(model, train_dataloaders=Train_DataLoader, val_dataloaders=Val_DataLoader)\n",
    "    \n",
    "    return trainer.callback_metrics[\"val_acc\"].item()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 71,
   "metadata": {},
   "outputs": [],
   "source": [
    "modelSSL.to(device)\n",
    "modelSSL.eval()\n",
    "\n",
    "predictions = []\n",
    "labels = []\n",
    "\n",
    "th = 0.5\n",
    "\n",
    "with torch.no_grad():\n",
    "    for data, label in iter(Test_DataLoader):\n",
    "        data = data.float().to(device)\n",
    "        label = label.to(device)\n",
    "        pred = modelSSL(data).flatten()\n",
    "        predictions.append(torch.sigmoid(pred))\n",
    "        labels.append(label)\n",
    "        \n",
    "labels = torch.cat(labels)\n",
    "predictions = torch.cat(predictions)\n",
    "\n",
    "PseudoLabels = (labels > th) * 1.0"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 72,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(tensor([0.3462, 0.4296, 0.3121, 0.3655, 0.4479, 0.4114, 0.4281, 0.5280, 0.8679, 0.7945, 0.5753, 0.7600, 0.9237, 0.6994, 0.4121, 0.5313], device='cuda:0'),\n",
       " tensor([0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1], device='cuda:0'),\n",
       " tensor([0., 0., 0., 0., 0., 0., 0., 0., 1., 1., 1., 1., 1., 1., 1., 1.], device='cuda:0'))"
      ]
     },
     "execution_count": 72,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "predictions, labels, PseudoLabels"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 42,
   "metadata": {},
   "outputs": [],
   "source": [
    "from sklearn.linear_model import RANSACRegressor\n",
    "\n",
    "ransac = RANSACRegressor()\n",
    "X_inliers = []\n",
    "y_inliers = []\n",
    "\n",
    "for class_idx in np.unique(labels):\n",
    "    class_mask = labels == class_idx\n",
    "    if np.sum(class_mask) > 0:\n",
    "        ransac.fit(predictions[class_mask], labels[class_mask])\n",
    "        inlier_mask = ransac.inlier_mask_\n",
    "        X_inliers.append(predictions[class_mask][inlier_mask])\n",
    "        y_inliers.append(labels[class_mask][inlier_mask])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 44,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([         -0], dtype=float32)"
      ]
     },
     "execution_count": 44,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "ransac.estimator_.coef_"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 45,
   "metadata": {},
   "outputs": [],
   "source": [
    "X_inliers = np.vstack(X_inliers)\n",
    "y_inliers = np.concatenate(y_inliers)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 46,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(array([[    0.40963],\n",
       "        [    0.48141],\n",
       "        [    0.40876],\n",
       "        [    0.41222],\n",
       "        [    0.58274],\n",
       "        [    0.48825],\n",
       "        [    0.48407],\n",
       "        [    0.62485],\n",
       "        [    0.84806],\n",
       "        [    0.88766],\n",
       "        [    0.59436],\n",
       "        [    0.80752],\n",
       "        [    0.88708],\n",
       "        [    0.79638],\n",
       "        [    0.50623],\n",
       "        [    0.61157]], dtype=float32),\n",
       " array([0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1], dtype=int64))"
      ]
     },
     "execution_count": 46,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "X_inliers, y_inliers"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "%%capture\n",
    "pruning = True\n",
    "\n",
    "pruner = optuna.pruners.SuccessiveHalvingPruner() if pruning else optuna.pruners.NopPruner()\n",
    "#Sampler = optuna.samplers.GPSampler(seed=2024)\n",
    "Sampler = optuna.samplers.TPESampler()\n",
    "#Sampler = optuna.samplers.GridSampler()\n",
    "\n",
    "\n",
    "study = optuna.create_study(direction=\"maximize\", pruner=pruner, sampler=Sampler)\n",
    "study.optimize(objective, n_trials=100, timeout=600)\n",
    "\n",
    "print(\"Number of finished trials: {}\".format(len(study.trials)))\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "print(\"Best trial:\")\n",
    "trial = study.best_trial\n",
    "\n",
    "print(\"  Value: {}\".format(trial.value))\n",
    "\n",
    "print(\"  Params: \")\n",
    "for key, value in trial.params.items():\n",
    "    print(\"    {}: {}\".format(key, value))"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "pyRARP",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.4"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}