diff --git a/config.py b/config.py new file mode 100644 index 0000000..f22d374 --- /dev/null +++ b/config.py @@ -0,0 +1,3 @@ +YOLOX_CONFIG_FILE = "modules/yolox/config/yolox_x_8x8_300e_coco-EARS-white.py" +YOLOX_CHECKPOINT_FILE = "models/yolox/yolox_x_White_only.pth" +DEVICE = "cuda:0" diff --git a/main.py b/main.py index 7228f30..c20442b 100644 --- a/main.py +++ b/main.py @@ -8,7 +8,7 @@ import numpy as np import pandas as pd from dotenv import load_dotenv -from mmdet.apis import inference_detector, init_detector +from mmdet.apis import DetInferencer, inference_detector, init_detector # New imports for RTMPose from mmpose.apis import inference_topdown @@ -18,6 +18,7 @@ from mmpose.structures import merge_data_samples from mmpose.utils import adapt_mmdet_pipeline +import config from util.calc_ste_position import CalcStethoscopePosition from util.ears_ai import EarsAI @@ -41,11 +42,73 @@ LIGHTGBM_ENABLED = os.getenv("LIGHTGBM_ENABLED", "True").lower() == "true" POSENET_ENABLED = os.getenv("PoseNet_ENABLED", "True").lower() == "true" RTMPOSE_ENABLED = os.getenv("RTMPose_ENABLED", "False").lower() == "true" +MobileNetV1SSD_ENABLED = os.getenv("MobileNetV1SSD_ENABLED", "False").lower() == "true" +YOLOX_ENABLED = os.getenv("YOLOX_ENABLED", "False").lower() == "true" # Get normalization setting NORMALIZE_ENABLED = os.getenv("NORMALIZE_ENABLED", "False").lower() == "true" +def init_yolox(): + try: + # MMDetectionのデフォルトスコープを設定 + from mmengine.registry import DefaultScope + + DefaultScope.get_instance("mmdet", scope_name="mmdet") + + init_args = { + "model": config.YOLOX_CONFIG_FILE, + "weights": config.YOLOX_CHECKPOINT_FILE, + "device": config.DEVICE, + } + + yolox_inferencer = DetInferencer(**init_args) + return yolox_inferencer + + except Exception as e: + print(f"Error initializing YOLOX: {str(e)}") + return None + + +def yolox_detector_inference(frame, yolox_inferencer, score_thr=0.3): + # yolox_inferencerがNoneの場合、デフォルト値を返す + if yolox_inferencer is None: + return frame, 0, 0 + + # 以下は既存の処理 + frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) + result = yolox_inferencer(inputs=frame_rgb, return_vis=True) + predictions = result["predictions"][0] + stethoscope_x = None + stethoscope_y = None + max_score = -1 + + for i, (label, score) in enumerate( + zip(predictions["labels"], predictions["scores"]) + ): + if score >= score_thr and label == 0 and score > max_score: + bbox = predictions["bboxes"][i] + stethoscope_x = (bbox[0] + bbox[2]) / 2 + stethoscope_y = (bbox[1] + bbox[3]) / 2 + max_score = score + + stethoscope_overlay_img = result["visualization"][0] + if ( + len(stethoscope_overlay_img.shape) == 3 + and stethoscope_overlay_img.shape[2] == 3 + ): + stethoscope_overlay_img = cv2.cvtColor( + stethoscope_overlay_img, cv2.COLOR_RGB2BGR + ) + + # 検出結果がない場合は0,0を返す + if stethoscope_x is None or stethoscope_y is None: + stethoscope_x = 0 + stethoscope_y = 0 + + return stethoscope_overlay_img, stethoscope_x, stethoscope_y + + def load_model(model_path, model_type="lgb"): with open(model_path, "rb") as model_file: return pickle.load(model_file) @@ -204,9 +267,18 @@ "No pose estimation method enabled. Please enable either PoseNet or RTMPose." ) continue - stethoscope_overlay_img, stethoscope_x, stethoscope_y = ears_ai.ssd_detect( - frame, None - ) + + if MobileNetV1SSD_ENABLED: + stethoscope_overlay_img, stethoscope_x, stethoscope_y = ears_ai.ssd_detect( + frame, None + ) + + yolox_inferencer = init_yolox() + + if YOLOX_ENABLED: + stethoscope_overlay_img, stethoscope_x, stethoscope_y = ( + yolox_detector_inference(frame, yolox_inferencer) + ) cv2.imwrite( os.path.join(pose_overlay_dir, image_file_name), @@ -564,9 +636,11 @@ help="Directory to save output images and results", ) det_config = "modules/rtmpose/mmdetection_cfg/rtmdet_m_640-8xb32_coco-person.py" - det_checkpoint = "https://download.openmmlab.com/mmpose/v1/projects/rtmpose/rtmdet_m_8xb32-100e_coco-obj365-person-235e8209.pth" - pose_config = "modules/rtmpose/configs/body_2d_keypoint/rtmpose/body8/rtmpose-m_8xb256-420e_body8-256x192.py" - pose_checkpoint = "https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-m_simcc-body7_pt-body7_420e-256x192-e48f03d0_20230504.pth" + det_checkpoint = ( + "models/rtmpose/rtmdet_m_8xb32-100e_coco-obj365-person-235e8209.pth" + ) + pose_config = "modules/rtmpose/configs/body_2d_keypoint/rtmpose/body8/rtmpose-l_8xb256-420e_body8-256x192.py" + pose_checkpoint = "models/rtmpose/rtmpose-l_simcc-aic-coco_pt-aic-coco_420e-256x192-f016ffe0_20230126.pth" args = parser.parse_args() diff --git a/modules/yolox/config/yolox_l_8x8_300e_coco-EARS-fine-tuning-test.py b/modules/yolox/config/yolox_l_8x8_300e_coco-EARS-fine-tuning-test.py new file mode 100644 index 0000000..7d996b8 --- /dev/null +++ b/modules/yolox/config/yolox_l_8x8_300e_coco-EARS-fine-tuning-test.py @@ -0,0 +1,326 @@ +# Basic Settings +default_scope = 'mmdet' +log_level = 'INFO' +load_from = 'checkpoints/yolox_l_Black_only.pth' +resume = False + +# Environment Configuration +env_cfg = dict( + cudnn_benchmark=False, + dist_cfg=dict(backend='nccl'), + mp_cfg=dict(mp_start_method='fork', opencv_num_threads=0) +) + +# Dataset and Data Root Configuration +data_root = 'data/White-lined-designs-Test-COCO/' +dataset_type = 'CocoDataset' +class_name = ['stethoscope'] +metainfo = dict( + classes=class_name, + palette=[(20, 220, 60)] +) + +# Image Size Configuration +img_scale = (640, 480) +img_scales = [(640, 480)] + +# Model Configuration +model = dict( + type='YOLOX', + data_preprocessor=dict( + type='DetDataPreprocessor', + pad_size_divisor=32, + batch_augments=[ + dict( + type='BatchSyncRandomResize', + random_size_range=(480, 480), + size_divisor=32, + interval=10 + ), + ], + ), + backbone=dict( + type='CSPDarknet', + deepen_factor=1.0, + widen_factor=1.0, + out_indices=(2, 3, 4), + use_depthwise=False, + spp_kernal_sizes=(5, 9, 13), + norm_cfg=dict(type='BN', momentum=0.03, eps=0.001), + act_cfg=dict(type='Swish'), + ), + neck=dict( + type='YOLOXPAFPN', + in_channels=[256, 512, 1024], + out_channels=256, + num_csp_blocks=3, + use_depthwise=False, + upsample_cfg=dict(scale_factor=2, mode='nearest'), + norm_cfg=dict(type='BN', momentum=0.03, eps=0.001), + act_cfg=dict(type='Swish'), + ), + bbox_head=dict( + type='YOLOXHead', + num_classes=1, + in_channels=256, + feat_channels=256, + strides=(8, 16, 32), + stacked_convs=2, + use_depthwise=False, + norm_cfg=dict(type='BN', momentum=0.03, eps=0.001), + act_cfg=dict(type='Swish'), + loss_cls=dict( + type='CrossEntropyLoss', + use_sigmoid=True, + reduction='sum', + loss_weight=1.0 + ), + loss_bbox=dict( + type='IoULoss', + mode='square', + eps=1e-16, + reduction='sum', + loss_weight=5.0 + ), + loss_obj=dict( + type='CrossEntropyLoss', + use_sigmoid=True, + reduction='sum', + loss_weight=1.0 + ), + loss_l1=dict(type='L1Loss', reduction='sum', loss_weight=1.0), + ), + train_cfg=dict(assigner=dict(type='SimOTAAssigner', center_radius=2.5)), + test_cfg=dict(score_thr=0.01, nms=dict(type='nms', iou_threshold=0.65)) +) + +# Training Configuration +max_epochs = 300 +num_last_epochs = 15 +base_lr = 0.01 +interval = 10 + +# Optimization Configuration +optim_wrapper = dict( + type='OptimWrapper', + optimizer=dict( + type='SGD', + lr=0.01, + momentum=0.9, + weight_decay=0.0005, + nesterov=True + ), + paramwise_cfg=dict( + norm_decay_mult=0.0, + bias_decay_mult=0.0 + ) +) + +# Learning Rate Schedule +param_scheduler = [ + dict( + type='mmdet.QuadraticWarmupLR', + by_epoch=True, + begin=0, + end=5, + convert_to_iter_based=True + ), + dict( + type='CosineAnnealingLR', + T_max=285, + eta_min=0.0005, + begin=5, + end=285, + by_epoch=True, + convert_to_iter_based=True + ), + dict( + type='ConstantLR', + factor=1, + begin=285, + end=300, + by_epoch=True + ), +] + +# Custom Hooks Configuration +custom_hooks = [ + dict( + type='YOLOXModeSwitchHook', + num_last_epochs=15, + priority=48 + ), + dict( + type='SyncNormHook', + priority=48 + ), + dict( + type='EMAHook', + ema_type='ExpMomentumEMA', + momentum=0.0001, + update_buffers=True, + priority=49 + ), +] + +# Default Hooks Configuration +default_hooks = dict( + timer=dict(type='IterTimerHook'), + logger=dict(type='LoggerHook', interval=50), + param_scheduler=dict(type='ParamSchedulerHook'), + checkpoint=dict(type='CheckpointHook', interval=10, max_keep_ckpts=3), + sampler_seed=dict(type='DistSamplerSeedHook'), + visualization=dict(type='DetVisualizationHook') +) + +# Data Loading Configuration +train_dataloader = dict( + batch_size=8, + num_workers=4, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( + type='MultiImageMixDataset', + dataset=dict( + type='CocoDataset', + data_root=data_root, + ann_file='train/annotations/instances_default.json', + data_prefix=dict(img='train/images/'), + metainfo=metainfo, + filter_cfg=dict(filter_empty_gt=False, min_size=32), + pipeline=[ + dict(type='LoadImageFromFile', backend_args=None), + dict(type='LoadAnnotations', with_bbox=True), + ], + backend_args=None + ), + pipeline=[ + dict(type='Mosaic', img_scale=(640, 480), pad_val=114.0), + dict( + type='RandomAffine', + scaling_ratio_range=(0.1, 2), + border=(-320, -240) + ), + dict( + type='MixUp', + img_scale=(640, 480), + ratio_range=(0.8, 1.6), + pad_val=114.0 + ), + dict(type='YOLOXHSVRandomAug'), + dict(type='RandomFlip', prob=0.5), + dict(type='Resize', scale=(640, 480), keep_ratio=True), + dict( + type='Pad', + pad_to_square=True, + pad_val=dict(img=(114.0, 114.0, 114.0)) + ), + dict( + type='FilterAnnotations', + min_gt_bbox_wh=(1, 1), + keep_empty=False + ), + dict(type='PackDetInputs') + ] + ) +) + +# Validation Configuration +val_dataloader = dict( + batch_size=8, + num_workers=4, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False), + dataset=dict( + type='CocoDataset', + data_root=data_root, + ann_file='valid/annotations/instances_default.json', + data_prefix=dict(img='valid/images/'), + test_mode=True, + metainfo=metainfo, + pipeline=[ + dict(type='LoadImageFromFile', backend_args=None), + dict(type='Resize', scale=(640, 480), keep_ratio=True), + dict( + type='Pad', + pad_to_square=True, + pad_val=dict(img=(114.0, 114.0, 114.0)) + ), + dict(type='LoadAnnotations', with_bbox=True), + dict( + type='PackDetInputs', + meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape', 'scale_factor') + ) + ] + ) +) + +# Test Configuration +test_dataloader = val_dataloader + +# Metrics and Evaluation Configuration +val_evaluator = dict( + type='CocoMetric', + ann_file=data_root + 'valid/annotations/instances_default.json', + metric='bbox', + backend_args=None +) +test_evaluator = val_evaluator + +# Training and Testing Loops Configuration +train_cfg = dict( + type='EpochBasedTrainLoop', + max_epochs=max_epochs, + val_interval=10 +) +val_cfg = dict(type='ValLoop') +test_cfg = dict(type='TestLoop') + +# Test Time Augmentation Configuration +tta_model = dict( + type='DetTTAModel', + tta_cfg=dict(nms=dict(type='nms', iou_threshold=0.65), max_per_img=100) +) + +tta_pipeline = [ + dict(type='LoadImageFromFile', backend_args=None), + dict( + type='TestTimeAug', + transforms=[ + [ + dict(type='Resize', scale=(640, 480), keep_ratio=True), + dict(type='Resize', scale=(320, 240), keep_ratio=True), + dict(type='Resize', scale=(960, 720), keep_ratio=True), + ], + [ + dict(type='RandomFlip', prob=1.0), + dict(type='RandomFlip', prob=0.0), + ], + [ + dict(type='Pad', pad_to_square=True, pad_val=dict(img=(114.0, 114.0, 114.0))), + ], + [ + dict(type='LoadAnnotations', with_bbox=True), + ], + [ + dict( + type='PackDetInputs', + meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape', 'scale_factor', 'flip', 'flip_direction') + ), + ] + ] + ) +] + +# Visualization Configuration +vis_backends = [dict(type='LocalVisBackend')] +visualizer = dict( + type='DetLocalVisualizer', + vis_backends=vis_backends, + name='visualizer' +) + +# Auto Scale Learning Rate Configuration +auto_scale_lr = dict(enable=False, base_batch_size=64) +log_processor = dict(type='LogProcessor', window_size=50, by_epoch=True) \ No newline at end of file diff --git a/modules/yolox/config/yolox_l_8x8_300e_coco-EARS-white.py b/modules/yolox/config/yolox_l_8x8_300e_coco-EARS-white.py new file mode 100644 index 0000000..ce0cda2 --- /dev/null +++ b/modules/yolox/config/yolox_l_8x8_300e_coco-EARS-white.py @@ -0,0 +1,326 @@ +# Basic Settings +default_scope = 'mmdet' +log_level = 'INFO' +load_from = 'checkpoints/raw-weight/yolox_l_8x8_300e_coco_20211126_140236-d3bd2b23.pth' +resume = False + +# Environment Configuration +env_cfg = dict( + cudnn_benchmark=False, + dist_cfg=dict(backend='nccl'), + mp_cfg=dict(mp_start_method='fork', opencv_num_threads=0) +) + +# Dataset and Data Root Configuration +data_root = 'data/White-lined-designs-COCO/' +dataset_type = 'CocoDataset' +class_name = ['stethoscope'] +metainfo = dict( + classes=class_name, + palette=[(20, 220, 60)] +) + +# Image Size Configuration +img_scale = (640, 480) +img_scales = [(640, 480)] + +# Model Configuration +model = dict( + type='YOLOX', + data_preprocessor=dict( + type='DetDataPreprocessor', + pad_size_divisor=32, + batch_augments=[ + dict( + type='BatchSyncRandomResize', + random_size_range=(480, 480), + size_divisor=32, + interval=10 + ), + ], + ), + backbone=dict( + type='CSPDarknet', + deepen_factor=1.0, + widen_factor=1.0, + out_indices=(2, 3, 4), + use_depthwise=False, + spp_kernal_sizes=(5, 9, 13), + norm_cfg=dict(type='BN', momentum=0.03, eps=0.001), + act_cfg=dict(type='Swish'), + ), + neck=dict( + type='YOLOXPAFPN', + in_channels=[256, 512, 1024], + out_channels=256, + num_csp_blocks=3, + use_depthwise=False, + upsample_cfg=dict(scale_factor=2, mode='nearest'), + norm_cfg=dict(type='BN', momentum=0.03, eps=0.001), + act_cfg=dict(type='Swish'), + ), + bbox_head=dict( + type='YOLOXHead', + num_classes=1, + in_channels=256, + feat_channels=256, + strides=(8, 16, 32), + stacked_convs=2, + use_depthwise=False, + norm_cfg=dict(type='BN', momentum=0.03, eps=0.001), + act_cfg=dict(type='Swish'), + loss_cls=dict( + type='CrossEntropyLoss', + use_sigmoid=True, + reduction='sum', + loss_weight=1.0 + ), + loss_bbox=dict( + type='IoULoss', + mode='square', + eps=1e-16, + reduction='sum', + loss_weight=5.0 + ), + loss_obj=dict( + type='CrossEntropyLoss', + use_sigmoid=True, + reduction='sum', + loss_weight=1.0 + ), + loss_l1=dict(type='L1Loss', reduction='sum', loss_weight=1.0), + ), + train_cfg=dict(assigner=dict(type='SimOTAAssigner', center_radius=2.5)), + test_cfg=dict(score_thr=0.01, nms=dict(type='nms', iou_threshold=0.65)) +) + +# Training Configuration +max_epochs = 300 +num_last_epochs = 15 +base_lr = 0.01 +interval = 10 + +# Optimization Configuration +optim_wrapper = dict( + type='OptimWrapper', + optimizer=dict( + type='SGD', + lr=0.01, + momentum=0.9, + weight_decay=0.0005, + nesterov=True + ), + paramwise_cfg=dict( + norm_decay_mult=0.0, + bias_decay_mult=0.0 + ) +) + +# Learning Rate Schedule +param_scheduler = [ + dict( + type='mmdet.QuadraticWarmupLR', + by_epoch=True, + begin=0, + end=5, + convert_to_iter_based=True + ), + dict( + type='CosineAnnealingLR', + T_max=285, + eta_min=0.0005, + begin=5, + end=285, + by_epoch=True, + convert_to_iter_based=True + ), + dict( + type='ConstantLR', + factor=1, + begin=285, + end=300, + by_epoch=True + ), +] + +# Custom Hooks Configuration +custom_hooks = [ + dict( + type='YOLOXModeSwitchHook', + num_last_epochs=15, + priority=48 + ), + dict( + type='SyncNormHook', + priority=48 + ), + dict( + type='EMAHook', + ema_type='ExpMomentumEMA', + momentum=0.0001, + update_buffers=True, + priority=49 + ), +] + +# Default Hooks Configuration +default_hooks = dict( + timer=dict(type='IterTimerHook'), + logger=dict(type='LoggerHook', interval=50), + param_scheduler=dict(type='ParamSchedulerHook'), + checkpoint=dict(type='CheckpointHook', interval=10, max_keep_ckpts=3), + sampler_seed=dict(type='DistSamplerSeedHook'), + visualization=dict(type='DetVisualizationHook') +) + +# Data Loading Configuration +train_dataloader = dict( + batch_size=8, + num_workers=4, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( + type='MultiImageMixDataset', + dataset=dict( + type='CocoDataset', + data_root=data_root, + ann_file='train/annotations/instances_default.json', + data_prefix=dict(img='train/images/'), + metainfo=metainfo, + filter_cfg=dict(filter_empty_gt=False, min_size=32), + pipeline=[ + dict(type='LoadImageFromFile', backend_args=None), + dict(type='LoadAnnotations', with_bbox=True), + ], + backend_args=None + ), + pipeline=[ + dict(type='Mosaic', img_scale=(640, 480), pad_val=114.0), + dict( + type='RandomAffine', + scaling_ratio_range=(0.1, 2), + border=(-320, -240) + ), + dict( + type='MixUp', + img_scale=(640, 480), + ratio_range=(0.8, 1.6), + pad_val=114.0 + ), + dict(type='YOLOXHSVRandomAug'), + dict(type='RandomFlip', prob=0.5), + dict(type='Resize', scale=(640, 480), keep_ratio=True), + dict( + type='Pad', + pad_to_square=True, + pad_val=dict(img=(114.0, 114.0, 114.0)) + ), + dict( + type='FilterAnnotations', + min_gt_bbox_wh=(1, 1), + keep_empty=False + ), + dict(type='PackDetInputs') + ] + ) +) + +# Validation Configuration +val_dataloader = dict( + batch_size=8, + num_workers=4, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False), + dataset=dict( + type='CocoDataset', + data_root=data_root, + ann_file='valid/annotations/instances_default.json', + data_prefix=dict(img='valid/images/'), + test_mode=True, + metainfo=metainfo, + pipeline=[ + dict(type='LoadImageFromFile', backend_args=None), + dict(type='Resize', scale=(640, 480), keep_ratio=True), + dict( + type='Pad', + pad_to_square=True, + pad_val=dict(img=(114.0, 114.0, 114.0)) + ), + dict(type='LoadAnnotations', with_bbox=True), + dict( + type='PackDetInputs', + meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape', 'scale_factor') + ) + ] + ) +) + +# Test Configuration +test_dataloader = val_dataloader + +# Metrics and Evaluation Configuration +val_evaluator = dict( + type='CocoMetric', + ann_file=data_root + 'valid/annotations/instances_default.json', + metric='bbox', + backend_args=None +) +test_evaluator = val_evaluator + +# Training and Testing Loops Configuration +train_cfg = dict( + type='EpochBasedTrainLoop', + max_epochs=max_epochs, + val_interval=10 +) +val_cfg = dict(type='ValLoop') +test_cfg = dict(type='TestLoop') + +# Test Time Augmentation Configuration +tta_model = dict( + type='DetTTAModel', + tta_cfg=dict(nms=dict(type='nms', iou_threshold=0.65), max_per_img=100) +) + +tta_pipeline = [ + dict(type='LoadImageFromFile', backend_args=None), + dict( + type='TestTimeAug', + transforms=[ + [ + dict(type='Resize', scale=(640, 480), keep_ratio=True), + dict(type='Resize', scale=(320, 240), keep_ratio=True), + dict(type='Resize', scale=(960, 720), keep_ratio=True), + ], + [ + dict(type='RandomFlip', prob=1.0), + dict(type='RandomFlip', prob=0.0), + ], + [ + dict(type='Pad', pad_to_square=True, pad_val=dict(img=(114.0, 114.0, 114.0))), + ], + [ + dict(type='LoadAnnotations', with_bbox=True), + ], + [ + dict( + type='PackDetInputs', + meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape', 'scale_factor', 'flip', 'flip_direction') + ), + ] + ] + ) +] + +# Visualization Configuration +vis_backends = [dict(type='LocalVisBackend')] +visualizer = dict( + type='DetLocalVisualizer', + vis_backends=vis_backends, + name='visualizer' +) + +# Auto Scale Learning Rate Configuration +auto_scale_lr = dict(enable=False, base_batch_size=64) +log_processor = dict(type='LogProcessor', window_size=50, by_epoch=True) \ No newline at end of file diff --git a/modules/yolox/config/yolox_s_8x8_300e_coco-EARS-fine-tuning-test.py b/modules/yolox/config/yolox_s_8x8_300e_coco-EARS-fine-tuning-test.py new file mode 100644 index 0000000..a8db9df --- /dev/null +++ b/modules/yolox/config/yolox_s_8x8_300e_coco-EARS-fine-tuning-test.py @@ -0,0 +1,326 @@ +# Basic Settings +default_scope = 'mmdet' +log_level = 'INFO' +load_from = 'checkpoints/yolox_s_Black_only.pth' # Changed to YOLOX-S checkpoint +resume = False + +# Environment Configuration +env_cfg = dict( + cudnn_benchmark=False, + dist_cfg=dict(backend='nccl'), + mp_cfg=dict(mp_start_method='fork', opencv_num_threads=0) +) + +# Dataset and Data Root Configuration +data_root = 'data/White-lined-designs-Test-COCO/' +dataset_type = 'CocoDataset' +class_name = ['stethoscope'] +metainfo = dict( + classes=class_name, + palette=[(20, 220, 60)] +) + +# Image Size Configuration +img_scale = (640, 480) +img_scales = [(640, 480)] + +# Model Configuration +model = dict( + type='YOLOX', + data_preprocessor=dict( + type='DetDataPreprocessor', + pad_size_divisor=32, + batch_augments=[ + dict( + type='BatchSyncRandomResize', + random_size_range=(480, 480), + size_divisor=32, + interval=10 + ), + ], + ), + backbone=dict( + type='CSPDarknet', + deepen_factor=0.33, # Changed from 1.0 for YOLOX-S + widen_factor=0.50, # Changed from 1.0 for YOLOX-S + out_indices=(2, 3, 4), + use_depthwise=False, + spp_kernal_sizes=(5, 9, 13), + norm_cfg=dict(type='BN', momentum=0.03, eps=0.001), + act_cfg=dict(type='Swish'), + ), + neck=dict( + type='YOLOXPAFPN', + in_channels=[128, 256, 512], # Changed according to widen_factor + out_channels=128, # Changed according to widen_factor + num_csp_blocks=1, # Changed from 3 for YOLOX-S + use_depthwise=False, + upsample_cfg=dict(scale_factor=2, mode='nearest'), + norm_cfg=dict(type='BN', momentum=0.03, eps=0.001), + act_cfg=dict(type='Swish'), + ), + bbox_head=dict( + type='YOLOXHead', + num_classes=1, + in_channels=128, # Changed according to neck out_channels + feat_channels=128, # Changed according to neck out_channels + strides=(8, 16, 32), + stacked_convs=2, + use_depthwise=False, + norm_cfg=dict(type='BN', momentum=0.03, eps=0.001), + act_cfg=dict(type='Swish'), + loss_cls=dict( + type='CrossEntropyLoss', + use_sigmoid=True, + reduction='sum', + loss_weight=1.0 + ), + loss_bbox=dict( + type='IoULoss', + mode='square', + eps=1e-16, + reduction='sum', + loss_weight=5.0 + ), + loss_obj=dict( + type='CrossEntropyLoss', + use_sigmoid=True, + reduction='sum', + loss_weight=1.0 + ), + loss_l1=dict(type='L1Loss', reduction='sum', loss_weight=1.0), + ), + train_cfg=dict(assigner=dict(type='SimOTAAssigner', center_radius=2.5)), + test_cfg=dict(score_thr=0.01, nms=dict(type='nms', iou_threshold=0.65)) +) + +# Training Configuration +max_epochs = 300 +num_last_epochs = 15 +base_lr = 0.01 * 64 / 128 # Adjusted for smaller batch size +interval = 10 + +# Optimization Configuration +optim_wrapper = dict( + type='OptimWrapper', + optimizer=dict( + type='SGD', + lr=0.01 * 64 / 128, # Adjusted for smaller batch size + momentum=0.9, + weight_decay=0.0005, + nesterov=True + ), + paramwise_cfg=dict( + norm_decay_mult=0.0, + bias_decay_mult=0.0 + ) +) + +# Learning Rate Schedule +param_scheduler = [ + dict( + type='mmdet.QuadraticWarmupLR', + by_epoch=True, + begin=0, + end=5, + convert_to_iter_based=True + ), + dict( + type='CosineAnnealingLR', + T_max=285, + eta_min=0.0005 * 64 / 128, # Adjusted for smaller batch size + begin=5, + end=285, + by_epoch=True, + convert_to_iter_based=True + ), + dict( + type='ConstantLR', + factor=1, + begin=285, + end=300, + by_epoch=True + ), +] + +# Custom Hooks Configuration +custom_hooks = [ + dict( + type='YOLOXModeSwitchHook', + num_last_epochs=15, + priority=48 + ), + dict( + type='SyncNormHook', + priority=48 + ), + dict( + type='EMAHook', + ema_type='ExpMomentumEMA', + momentum=0.0001, + update_buffers=True, + priority=49 + ), +] + +# Default Hooks Configuration +default_hooks = dict( + timer=dict(type='IterTimerHook'), + logger=dict(type='LoggerHook', interval=50), + param_scheduler=dict(type='ParamSchedulerHook'), + checkpoint=dict(type='CheckpointHook', interval=10, max_keep_ckpts=3), + sampler_seed=dict(type='DistSamplerSeedHook'), + visualization=dict(type='DetVisualizationHook') +) + +# Data Loading Configuration +train_dataloader = dict( + batch_size=8, # Kept same as original + num_workers=4, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( + type='MultiImageMixDataset', + dataset=dict( + type='CocoDataset', + data_root=data_root, + ann_file='train/annotations/instances_default.json', + data_prefix=dict(img='train/images/'), + metainfo=metainfo, + filter_cfg=dict(filter_empty_gt=False, min_size=32), + pipeline=[ + dict(type='LoadImageFromFile', backend_args=None), + dict(type='LoadAnnotations', with_bbox=True), + ], + backend_args=None + ), + pipeline=[ + dict(type='Mosaic', img_scale=(640, 480), pad_val=114.0), + dict( + type='RandomAffine', + scaling_ratio_range=(0.1, 2), + border=(-320, -240) + ), + dict( + type='MixUp', + img_scale=(640, 480), + ratio_range=(0.8, 1.6), + pad_val=114.0 + ), + dict(type='YOLOXHSVRandomAug'), + dict(type='RandomFlip', prob=0.5), + dict(type='Resize', scale=(640, 480), keep_ratio=True), + dict( + type='Pad', + pad_to_square=True, + pad_val=dict(img=(114.0, 114.0, 114.0)) + ), + dict( + type='FilterAnnotations', + min_gt_bbox_wh=(1, 1), + keep_empty=False + ), + dict(type='PackDetInputs') + ] + ) +) + +# Validation Configuration +val_dataloader = dict( + batch_size=8, + num_workers=4, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False), + dataset=dict( + type='CocoDataset', + data_root=data_root, + ann_file='valid/annotations/instances_default.json', + data_prefix=dict(img='valid/images/'), + test_mode=True, + metainfo=metainfo, + pipeline=[ + dict(type='LoadImageFromFile', backend_args=None), + dict(type='Resize', scale=(640, 480), keep_ratio=True), + dict( + type='Pad', + pad_to_square=True, + pad_val=dict(img=(114.0, 114.0, 114.0)) + ), + dict(type='LoadAnnotations', with_bbox=True), + dict( + type='PackDetInputs', + meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape', 'scale_factor') + ) + ] + ) +) + +# Test Configuration +test_dataloader = val_dataloader + +# Metrics and Evaluation Configuration +val_evaluator = dict( + type='CocoMetric', + ann_file=data_root + 'valid/annotations/instances_default.json', + metric='bbox', + backend_args=None +) +test_evaluator = val_evaluator + +# Training and Testing Loops Configuration +train_cfg = dict( + type='EpochBasedTrainLoop', + max_epochs=max_epochs, + val_interval=10 +) +val_cfg = dict(type='ValLoop') +test_cfg = dict(type='TestLoop') + +# Test Time Augmentation Configuration +tta_model = dict( + type='DetTTAModel', + tta_cfg=dict(nms=dict(type='nms', iou_threshold=0.65), max_per_img=100) +) + +tta_pipeline = [ + dict(type='LoadImageFromFile', backend_args=None), + dict( + type='TestTimeAug', + transforms=[ + [ + dict(type='Resize', scale=(640, 480), keep_ratio=True), + dict(type='Resize', scale=(320, 240), keep_ratio=True), + dict(type='Resize', scale=(960, 720), keep_ratio=True), + ], + [ + dict(type='RandomFlip', prob=1.0), + dict(type='RandomFlip', prob=0.0), + ], + [ + dict(type='Pad', pad_to_square=True, pad_val=dict(img=(114.0, 114.0, 114.0))), + ], + [ + dict(type='LoadAnnotations', with_bbox=True), + ], + [ + dict( + type='PackDetInputs', + meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape', 'scale_factor', 'flip', 'flip_direction') + ), + ] + ] + ) +] + +# Visualization Configuration +vis_backends = [dict(type='LocalVisBackend')] +visualizer = dict( + type='DetLocalVisualizer', + vis_backends=vis_backends, + name='visualizer' +) + +# Auto Scale Learning Rate Configuration +auto_scale_lr = dict(enable=False, base_batch_size=64) +log_processor = dict(type='LogProcessor', window_size=50, by_epoch=True) \ No newline at end of file diff --git a/modules/yolox/config/yolox_s_8x8_300e_coco-EARS-white.py b/modules/yolox/config/yolox_s_8x8_300e_coco-EARS-white.py new file mode 100644 index 0000000..4762ddf --- /dev/null +++ b/modules/yolox/config/yolox_s_8x8_300e_coco-EARS-white.py @@ -0,0 +1,326 @@ +# Basic Settings +default_scope = 'mmdet' +log_level = 'INFO' +load_from = 'checkpoints/raw-weight/yolox_s_8x8_300e_coco_20211121_095711-4592a793.pth' # Changed to YOLOX-S checkpoint +resume = False + +# Environment Configuration +env_cfg = dict( + cudnn_benchmark=False, + dist_cfg=dict(backend='nccl'), + mp_cfg=dict(mp_start_method='fork', opencv_num_threads=0) +) + +# Dataset and Data Root Configuration +data_root = 'data/White-lined-designs-COCO/' +dataset_type = 'CocoDataset' +class_name = ['stethoscope'] +metainfo = dict( + classes=class_name, + palette=[(20, 220, 60)] +) + +# Image Size Configuration +img_scale = (640, 480) +img_scales = [(640, 480)] + +# Model Configuration +model = dict( + type='YOLOX', + data_preprocessor=dict( + type='DetDataPreprocessor', + pad_size_divisor=32, + batch_augments=[ + dict( + type='BatchSyncRandomResize', + random_size_range=(480, 480), + size_divisor=32, + interval=10 + ), + ], + ), + backbone=dict( + type='CSPDarknet', + deepen_factor=0.33, # Changed from 1.0 for YOLOX-S + widen_factor=0.50, # Changed from 1.0 for YOLOX-S + out_indices=(2, 3, 4), + use_depthwise=False, + spp_kernal_sizes=(5, 9, 13), + norm_cfg=dict(type='BN', momentum=0.03, eps=0.001), + act_cfg=dict(type='Swish'), + ), + neck=dict( + type='YOLOXPAFPN', + in_channels=[128, 256, 512], # Changed according to widen_factor + out_channels=128, # Changed according to widen_factor + num_csp_blocks=1, # Changed from 3 for YOLOX-S + use_depthwise=False, + upsample_cfg=dict(scale_factor=2, mode='nearest'), + norm_cfg=dict(type='BN', momentum=0.03, eps=0.001), + act_cfg=dict(type='Swish'), + ), + bbox_head=dict( + type='YOLOXHead', + num_classes=1, + in_channels=128, # Changed according to neck out_channels + feat_channels=128, # Changed according to neck out_channels + strides=(8, 16, 32), + stacked_convs=2, + use_depthwise=False, + norm_cfg=dict(type='BN', momentum=0.03, eps=0.001), + act_cfg=dict(type='Swish'), + loss_cls=dict( + type='CrossEntropyLoss', + use_sigmoid=True, + reduction='sum', + loss_weight=1.0 + ), + loss_bbox=dict( + type='IoULoss', + mode='square', + eps=1e-16, + reduction='sum', + loss_weight=5.0 + ), + loss_obj=dict( + type='CrossEntropyLoss', + use_sigmoid=True, + reduction='sum', + loss_weight=1.0 + ), + loss_l1=dict(type='L1Loss', reduction='sum', loss_weight=1.0), + ), + train_cfg=dict(assigner=dict(type='SimOTAAssigner', center_radius=2.5)), + test_cfg=dict(score_thr=0.01, nms=dict(type='nms', iou_threshold=0.65)) +) + +# Training Configuration +max_epochs = 300 +num_last_epochs = 15 +base_lr = 0.01 * 64 / 128 # Adjusted for smaller batch size +interval = 10 + +# Optimization Configuration +optim_wrapper = dict( + type='OptimWrapper', + optimizer=dict( + type='SGD', + lr=0.01 * 64 / 128, # Adjusted for smaller batch size + momentum=0.9, + weight_decay=0.0005, + nesterov=True + ), + paramwise_cfg=dict( + norm_decay_mult=0.0, + bias_decay_mult=0.0 + ) +) + +# Learning Rate Schedule +param_scheduler = [ + dict( + type='mmdet.QuadraticWarmupLR', + by_epoch=True, + begin=0, + end=5, + convert_to_iter_based=True + ), + dict( + type='CosineAnnealingLR', + T_max=285, + eta_min=0.0005 * 64 / 128, # Adjusted for smaller batch size + begin=5, + end=285, + by_epoch=True, + convert_to_iter_based=True + ), + dict( + type='ConstantLR', + factor=1, + begin=285, + end=300, + by_epoch=True + ), +] + +# Custom Hooks Configuration +custom_hooks = [ + dict( + type='YOLOXModeSwitchHook', + num_last_epochs=15, + priority=48 + ), + dict( + type='SyncNormHook', + priority=48 + ), + dict( + type='EMAHook', + ema_type='ExpMomentumEMA', + momentum=0.0001, + update_buffers=True, + priority=49 + ), +] + +# Default Hooks Configuration +default_hooks = dict( + timer=dict(type='IterTimerHook'), + logger=dict(type='LoggerHook', interval=50), + param_scheduler=dict(type='ParamSchedulerHook'), + checkpoint=dict(type='CheckpointHook', interval=10, max_keep_ckpts=3), + sampler_seed=dict(type='DistSamplerSeedHook'), + visualization=dict(type='DetVisualizationHook') +) + +# Data Loading Configuration +train_dataloader = dict( + batch_size=8, # Kept same as original + num_workers=4, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( + type='MultiImageMixDataset', + dataset=dict( + type='CocoDataset', + data_root=data_root, + ann_file='train/annotations/instances_default.json', + data_prefix=dict(img='train/images/'), + metainfo=metainfo, + filter_cfg=dict(filter_empty_gt=False, min_size=32), + pipeline=[ + dict(type='LoadImageFromFile', backend_args=None), + dict(type='LoadAnnotations', with_bbox=True), + ], + backend_args=None + ), + pipeline=[ + dict(type='Mosaic', img_scale=(640, 480), pad_val=114.0), + dict( + type='RandomAffine', + scaling_ratio_range=(0.1, 2), + border=(-320, -240) + ), + dict( + type='MixUp', + img_scale=(640, 480), + ratio_range=(0.8, 1.6), + pad_val=114.0 + ), + dict(type='YOLOXHSVRandomAug'), + dict(type='RandomFlip', prob=0.5), + dict(type='Resize', scale=(640, 480), keep_ratio=True), + dict( + type='Pad', + pad_to_square=True, + pad_val=dict(img=(114.0, 114.0, 114.0)) + ), + dict( + type='FilterAnnotations', + min_gt_bbox_wh=(1, 1), + keep_empty=False + ), + dict(type='PackDetInputs') + ] + ) +) + +# Validation Configuration +val_dataloader = dict( + batch_size=8, + num_workers=4, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False), + dataset=dict( + type='CocoDataset', + data_root=data_root, + ann_file='valid/annotations/instances_default.json', + data_prefix=dict(img='valid/images/'), + test_mode=True, + metainfo=metainfo, + pipeline=[ + dict(type='LoadImageFromFile', backend_args=None), + dict(type='Resize', scale=(640, 480), keep_ratio=True), + dict( + type='Pad', + pad_to_square=True, + pad_val=dict(img=(114.0, 114.0, 114.0)) + ), + dict(type='LoadAnnotations', with_bbox=True), + dict( + type='PackDetInputs', + meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape', 'scale_factor') + ) + ] + ) +) + +# Test Configuration +test_dataloader = val_dataloader + +# Metrics and Evaluation Configuration +val_evaluator = dict( + type='CocoMetric', + ann_file=data_root + 'valid/annotations/instances_default.json', + metric='bbox', + backend_args=None +) +test_evaluator = val_evaluator + +# Training and Testing Loops Configuration +train_cfg = dict( + type='EpochBasedTrainLoop', + max_epochs=max_epochs, + val_interval=10 +) +val_cfg = dict(type='ValLoop') +test_cfg = dict(type='TestLoop') + +# Test Time Augmentation Configuration +tta_model = dict( + type='DetTTAModel', + tta_cfg=dict(nms=dict(type='nms', iou_threshold=0.65), max_per_img=100) +) + +tta_pipeline = [ + dict(type='LoadImageFromFile', backend_args=None), + dict( + type='TestTimeAug', + transforms=[ + [ + dict(type='Resize', scale=(640, 480), keep_ratio=True), + dict(type='Resize', scale=(320, 240), keep_ratio=True), + dict(type='Resize', scale=(960, 720), keep_ratio=True), + ], + [ + dict(type='RandomFlip', prob=1.0), + dict(type='RandomFlip', prob=0.0), + ], + [ + dict(type='Pad', pad_to_square=True, pad_val=dict(img=(114.0, 114.0, 114.0))), + ], + [ + dict(type='LoadAnnotations', with_bbox=True), + ], + [ + dict( + type='PackDetInputs', + meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape', 'scale_factor', 'flip', 'flip_direction') + ), + ] + ] + ) +] + +# Visualization Configuration +vis_backends = [dict(type='LocalVisBackend')] +visualizer = dict( + type='DetLocalVisualizer', + vis_backends=vis_backends, + name='visualizer' +) + +# Auto Scale Learning Rate Configuration +auto_scale_lr = dict(enable=False, base_batch_size=64) +log_processor = dict(type='LogProcessor', window_size=50, by_epoch=True) \ No newline at end of file diff --git a/modules/yolox/config/yolox_tiny_8x8_300e_coco-EARS-fine-tuning-test.py b/modules/yolox/config/yolox_tiny_8x8_300e_coco-EARS-fine-tuning-test.py new file mode 100644 index 0000000..c7e71cc --- /dev/null +++ b/modules/yolox/config/yolox_tiny_8x8_300e_coco-EARS-fine-tuning-test.py @@ -0,0 +1,326 @@ +# Basic Settings +default_scope = 'mmdet' +log_level = 'INFO' +load_from = 'checkpoints/yolox_tiny_Black_only.pth' +resume = False + +# Environment Configuration +env_cfg = dict( + cudnn_benchmark=False, + dist_cfg=dict(backend='nccl'), + mp_cfg=dict(mp_start_method='fork', opencv_num_threads=0) +) + +# Dataset and Data Root Configuration +data_root = 'data/White-lined-designs-Test-COCO/' +dataset_type = 'CocoDataset' +class_name = ['stethoscope'] +metainfo = dict( + classes=class_name, + palette=[(20, 220, 60)] +) + +# Image Size Configuration +img_scale = (640, 480) # Kept original image size +img_scales = [(640, 480)] + +# Model Configuration +model = dict( + type='YOLOX', + data_preprocessor=dict( + type='DetDataPreprocessor', + pad_size_divisor=32, + batch_augments=[ + dict( + type='BatchSyncRandomResize', + random_size_range=(480, 480), # Kept original size + size_divisor=32, + interval=10 + ), + ], + ), + backbone=dict( + type='CSPDarknet', + deepen_factor=0.33, # Same as YOLOX-S + widen_factor=0.375, # Changed to 0.375 for YOLOX-Tiny + out_indices=(2, 3, 4), + use_depthwise=True, # Changed to True for more efficiency + spp_kernal_sizes=(5, 9, 13), + norm_cfg=dict(type='BN', momentum=0.03, eps=0.001), + act_cfg=dict(type='Swish'), + ), + neck=dict( + type='YOLOXPAFPN', + in_channels=[96, 192, 384], # Changed according to widen_factor + out_channels=96, # Changed according to widen_factor + num_csp_blocks=1, # Same as YOLOX-S + use_depthwise=True, # Changed to True for more efficiency + upsample_cfg=dict(scale_factor=2, mode='nearest'), + norm_cfg=dict(type='BN', momentum=0.03, eps=0.001), + act_cfg=dict(type='Swish'), + ), + bbox_head=dict( + type='YOLOXHead', + num_classes=1, + in_channels=96, # Changed according to neck out_channels + feat_channels=96, # Changed according to neck out_channels + strides=(8, 16, 32), + stacked_convs=2, + use_depthwise=True, # Changed to True for more efficiency + norm_cfg=dict(type='BN', momentum=0.03, eps=0.001), + act_cfg=dict(type='Swish'), + loss_cls=dict( + type='CrossEntropyLoss', + use_sigmoid=True, + reduction='sum', + loss_weight=1.0 + ), + loss_bbox=dict( + type='IoULoss', + mode='square', + eps=1e-16, + reduction='sum', + loss_weight=5.0 + ), + loss_obj=dict( + type='CrossEntropyLoss', + use_sigmoid=True, + reduction='sum', + loss_weight=1.0 + ), + loss_l1=dict(type='L1Loss', reduction='sum', loss_weight=1.0), + ), + train_cfg=dict(assigner=dict(type='SimOTAAssigner', center_radius=2.5)), + test_cfg=dict(score_thr=0.01, nms=dict(type='nms', iou_threshold=0.65)) +) + +# Training Configuration +max_epochs = 300 +num_last_epochs = 15 +base_lr = 0.01 * 64 / 128 # Adjusted for batch size +interval = 10 + +# Optimization Configuration +optim_wrapper = dict( + type='OptimWrapper', + optimizer=dict( + type='SGD', + lr=0.01 * 64 / 128, # Adjusted for batch size + momentum=0.9, + weight_decay=0.0005, + nesterov=True + ), + paramwise_cfg=dict( + norm_decay_mult=0.0, + bias_decay_mult=0.0 + ) +) + +# Learning Rate Schedule +param_scheduler = [ + dict( + type='mmdet.QuadraticWarmupLR', + by_epoch=True, + begin=0, + end=5, + convert_to_iter_based=True + ), + dict( + type='CosineAnnealingLR', + T_max=285, + eta_min=0.0005 * 64 / 128, # Adjusted for batch size + begin=5, + end=285, + by_epoch=True, + convert_to_iter_based=True + ), + dict( + type='ConstantLR', + factor=1, + begin=285, + end=300, + by_epoch=True + ), +] + +# Custom Hooks Configuration +custom_hooks = [ + dict( + type='YOLOXModeSwitchHook', + num_last_epochs=15, + priority=48 + ), + dict( + type='SyncNormHook', + priority=48 + ), + dict( + type='EMAHook', + ema_type='ExpMomentumEMA', + momentum=0.0001, + update_buffers=True, + priority=49 + ), +] + +# Default Hooks Configuration +default_hooks = dict( + timer=dict(type='IterTimerHook'), + logger=dict(type='LoggerHook', interval=50), + param_scheduler=dict(type='ParamSchedulerHook'), + checkpoint=dict(type='CheckpointHook', interval=10, max_keep_ckpts=3), + sampler_seed=dict(type='DistSamplerSeedHook'), + visualization=dict(type='DetVisualizationHook') +) + +# Data Loading Configuration +train_dataloader = dict( + batch_size=8, + num_workers=4, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( + type='MultiImageMixDataset', + dataset=dict( + type='CocoDataset', + data_root=data_root, + ann_file='train/annotations/instances_default.json', + data_prefix=dict(img='train/images/'), + metainfo=metainfo, + filter_cfg=dict(filter_empty_gt=False, min_size=32), + pipeline=[ + dict(type='LoadImageFromFile', backend_args=None), + dict(type='LoadAnnotations', with_bbox=True), + ], + backend_args=None + ), + pipeline=[ + dict(type='Mosaic', img_scale=(640, 480), pad_val=114.0), # Kept original image size + dict( + type='RandomAffine', + scaling_ratio_range=(0.1, 2), + border=(-320, -240) # Kept original border size + ), + dict( + type='MixUp', + img_scale=(640, 480), # Kept original image size + ratio_range=(0.8, 1.6), + pad_val=114.0 + ), + dict(type='YOLOXHSVRandomAug'), + dict(type='RandomFlip', prob=0.5), + dict(type='Resize', scale=(640, 480), keep_ratio=True), # Kept original image size + dict( + type='Pad', + pad_to_square=True, + pad_val=dict(img=(114.0, 114.0, 114.0)) + ), + dict( + type='FilterAnnotations', + min_gt_bbox_wh=(1, 1), + keep_empty=False + ), + dict(type='PackDetInputs') + ] + ) +) + +# Validation Configuration +val_dataloader = dict( + batch_size=8, + num_workers=4, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False), + dataset=dict( + type='CocoDataset', + data_root=data_root, + ann_file='valid/annotations/instances_default.json', + data_prefix=dict(img='valid/images/'), + test_mode=True, + metainfo=metainfo, + pipeline=[ + dict(type='LoadImageFromFile', backend_args=None), + dict(type='Resize', scale=(640, 480), keep_ratio=True), # Kept original image size + dict( + type='Pad', + pad_to_square=True, + pad_val=dict(img=(114.0, 114.0, 114.0)) + ), + dict(type='LoadAnnotations', with_bbox=True), + dict( + type='PackDetInputs', + meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape', 'scale_factor') + ) + ] + ) +) + +# Test Configuration +test_dataloader = val_dataloader + +# Metrics and Evaluation Configuration +val_evaluator = dict( + type='CocoMetric', + ann_file=data_root + 'valid/annotations/instances_default.json', + metric='bbox', + backend_args=None +) +test_evaluator = val_evaluator + +# Training and Testing Loops Configuration +train_cfg = dict( + type='EpochBasedTrainLoop', + max_epochs=max_epochs, + val_interval=10 +) +val_cfg = dict(type='ValLoop') +test_cfg = dict(type='TestLoop') + +# Test Time Augmentation Configuration +tta_model = dict( + type='DetTTAModel', + tta_cfg=dict(nms=dict(type='nms', iou_threshold=0.65), max_per_img=100) +) + +tta_pipeline = [ + dict(type='LoadImageFromFile', backend_args=None), + dict( + type='TestTimeAug', + transforms=[ + [ + dict(type='Resize', scale=(640, 480), keep_ratio=True), # Kept original image size + dict(type='Resize', scale=(320, 240), keep_ratio=True), # Kept original scale ratio + dict(type='Resize', scale=(960, 720), keep_ratio=True), # Kept original scale ratio + ], + [ + dict(type='RandomFlip', prob=1.0), + dict(type='RandomFlip', prob=0.0), + ], + [ + dict(type='Pad', pad_to_square=True, pad_val=dict(img=(114.0, 114.0, 114.0))), + ], + [ + dict(type='LoadAnnotations', with_bbox=True), + ], + [ + dict( + type='PackDetInputs', + meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape', 'scale_factor', 'flip', 'flip_direction') + ), + ] + ] + ) +] + +# Visualization Configuration +vis_backends = [dict(type='LocalVisBackend')] +visualizer = dict( + type='DetLocalVisualizer', + vis_backends=vis_backends, + name='visualizer' +) + +# Auto Scale Learning Rate Configuration +auto_scale_lr = dict(enable=False, base_batch_size=64) +log_processor = dict(type='LogProcessor', window_size=50, by_epoch=True) \ No newline at end of file diff --git a/modules/yolox/config/yolox_tiny_8x8_300e_coco-EARS-white.py b/modules/yolox/config/yolox_tiny_8x8_300e_coco-EARS-white.py new file mode 100644 index 0000000..ce012b5 --- /dev/null +++ b/modules/yolox/config/yolox_tiny_8x8_300e_coco-EARS-white.py @@ -0,0 +1,326 @@ +# Basic Settings +default_scope = 'mmdet' +log_level = 'INFO' +load_from = 'checkpoints/raw-weight/yolox_tiny_8x8_300e_coco_20211124_171234-b4047906.pth' +resume = False + +# Environment Configuration +env_cfg = dict( + cudnn_benchmark=False, + dist_cfg=dict(backend='nccl'), + mp_cfg=dict(mp_start_method='fork', opencv_num_threads=0) +) + +# Dataset and Data Root Configuration +data_root = 'data/White-lined-designs-COCO/' +dataset_type = 'CocoDataset' +class_name = ['stethoscope'] +metainfo = dict( + classes=class_name, + palette=[(20, 220, 60)] +) + +# Image Size Configuration +img_scale = (640, 480) # Kept original image size +img_scales = [(640, 480)] + +# Model Configuration +model = dict( + type='YOLOX', + data_preprocessor=dict( + type='DetDataPreprocessor', + pad_size_divisor=32, + batch_augments=[ + dict( + type='BatchSyncRandomResize', + random_size_range=(480, 480), # Kept original size + size_divisor=32, + interval=10 + ), + ], + ), + backbone=dict( + type='CSPDarknet', + deepen_factor=0.33, # Same as YOLOX-S + widen_factor=0.375, # Changed to 0.375 for YOLOX-Tiny + out_indices=(2, 3, 4), + use_depthwise=True, # Changed to True for more efficiency + spp_kernal_sizes=(5, 9, 13), + norm_cfg=dict(type='BN', momentum=0.03, eps=0.001), + act_cfg=dict(type='Swish'), + ), + neck=dict( + type='YOLOXPAFPN', + in_channels=[96, 192, 384], # Changed according to widen_factor + out_channels=96, # Changed according to widen_factor + num_csp_blocks=1, # Same as YOLOX-S + use_depthwise=True, # Changed to True for more efficiency + upsample_cfg=dict(scale_factor=2, mode='nearest'), + norm_cfg=dict(type='BN', momentum=0.03, eps=0.001), + act_cfg=dict(type='Swish'), + ), + bbox_head=dict( + type='YOLOXHead', + num_classes=1, + in_channels=96, # Changed according to neck out_channels + feat_channels=96, # Changed according to neck out_channels + strides=(8, 16, 32), + stacked_convs=2, + use_depthwise=True, # Changed to True for more efficiency + norm_cfg=dict(type='BN', momentum=0.03, eps=0.001), + act_cfg=dict(type='Swish'), + loss_cls=dict( + type='CrossEntropyLoss', + use_sigmoid=True, + reduction='sum', + loss_weight=1.0 + ), + loss_bbox=dict( + type='IoULoss', + mode='square', + eps=1e-16, + reduction='sum', + loss_weight=5.0 + ), + loss_obj=dict( + type='CrossEntropyLoss', + use_sigmoid=True, + reduction='sum', + loss_weight=1.0 + ), + loss_l1=dict(type='L1Loss', reduction='sum', loss_weight=1.0), + ), + train_cfg=dict(assigner=dict(type='SimOTAAssigner', center_radius=2.5)), + test_cfg=dict(score_thr=0.01, nms=dict(type='nms', iou_threshold=0.65)) +) + +# Training Configuration +max_epochs = 300 +num_last_epochs = 15 +base_lr = 0.01 * 64 / 128 # Adjusted for batch size +interval = 10 + +# Optimization Configuration +optim_wrapper = dict( + type='OptimWrapper', + optimizer=dict( + type='SGD', + lr=0.01 * 64 / 128, # Adjusted for batch size + momentum=0.9, + weight_decay=0.0005, + nesterov=True + ), + paramwise_cfg=dict( + norm_decay_mult=0.0, + bias_decay_mult=0.0 + ) +) + +# Learning Rate Schedule +param_scheduler = [ + dict( + type='mmdet.QuadraticWarmupLR', + by_epoch=True, + begin=0, + end=5, + convert_to_iter_based=True + ), + dict( + type='CosineAnnealingLR', + T_max=285, + eta_min=0.0005 * 64 / 128, # Adjusted for batch size + begin=5, + end=285, + by_epoch=True, + convert_to_iter_based=True + ), + dict( + type='ConstantLR', + factor=1, + begin=285, + end=300, + by_epoch=True + ), +] + +# Custom Hooks Configuration +custom_hooks = [ + dict( + type='YOLOXModeSwitchHook', + num_last_epochs=15, + priority=48 + ), + dict( + type='SyncNormHook', + priority=48 + ), + dict( + type='EMAHook', + ema_type='ExpMomentumEMA', + momentum=0.0001, + update_buffers=True, + priority=49 + ), +] + +# Default Hooks Configuration +default_hooks = dict( + timer=dict(type='IterTimerHook'), + logger=dict(type='LoggerHook', interval=50), + param_scheduler=dict(type='ParamSchedulerHook'), + checkpoint=dict(type='CheckpointHook', interval=10, max_keep_ckpts=3), + sampler_seed=dict(type='DistSamplerSeedHook'), + visualization=dict(type='DetVisualizationHook') +) + +# Data Loading Configuration +train_dataloader = dict( + batch_size=8, + num_workers=4, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( + type='MultiImageMixDataset', + dataset=dict( + type='CocoDataset', + data_root=data_root, + ann_file='train/annotations/instances_default.json', + data_prefix=dict(img='train/images/'), + metainfo=metainfo, + filter_cfg=dict(filter_empty_gt=False, min_size=32), + pipeline=[ + dict(type='LoadImageFromFile', backend_args=None), + dict(type='LoadAnnotations', with_bbox=True), + ], + backend_args=None + ), + pipeline=[ + dict(type='Mosaic', img_scale=(640, 480), pad_val=114.0), # Kept original image size + dict( + type='RandomAffine', + scaling_ratio_range=(0.1, 2), + border=(-320, -240) # Kept original border size + ), + dict( + type='MixUp', + img_scale=(640, 480), # Kept original image size + ratio_range=(0.8, 1.6), + pad_val=114.0 + ), + dict(type='YOLOXHSVRandomAug'), + dict(type='RandomFlip', prob=0.5), + dict(type='Resize', scale=(640, 480), keep_ratio=True), # Kept original image size + dict( + type='Pad', + pad_to_square=True, + pad_val=dict(img=(114.0, 114.0, 114.0)) + ), + dict( + type='FilterAnnotations', + min_gt_bbox_wh=(1, 1), + keep_empty=False + ), + dict(type='PackDetInputs') + ] + ) +) + +# Validation Configuration +val_dataloader = dict( + batch_size=8, + num_workers=4, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False), + dataset=dict( + type='CocoDataset', + data_root=data_root, + ann_file='valid/annotations/instances_default.json', + data_prefix=dict(img='valid/images/'), + test_mode=True, + metainfo=metainfo, + pipeline=[ + dict(type='LoadImageFromFile', backend_args=None), + dict(type='Resize', scale=(640, 480), keep_ratio=True), # Kept original image size + dict( + type='Pad', + pad_to_square=True, + pad_val=dict(img=(114.0, 114.0, 114.0)) + ), + dict(type='LoadAnnotations', with_bbox=True), + dict( + type='PackDetInputs', + meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape', 'scale_factor') + ) + ] + ) +) + +# Test Configuration +test_dataloader = val_dataloader + +# Metrics and Evaluation Configuration +val_evaluator = dict( + type='CocoMetric', + ann_file=data_root + 'valid/annotations/instances_default.json', + metric='bbox', + backend_args=None +) +test_evaluator = val_evaluator + +# Training and Testing Loops Configuration +train_cfg = dict( + type='EpochBasedTrainLoop', + max_epochs=max_epochs, + val_interval=10 +) +val_cfg = dict(type='ValLoop') +test_cfg = dict(type='TestLoop') + +# Test Time Augmentation Configuration +tta_model = dict( + type='DetTTAModel', + tta_cfg=dict(nms=dict(type='nms', iou_threshold=0.65), max_per_img=100) +) + +tta_pipeline = [ + dict(type='LoadImageFromFile', backend_args=None), + dict( + type='TestTimeAug', + transforms=[ + [ + dict(type='Resize', scale=(640, 480), keep_ratio=True), # Kept original image size + dict(type='Resize', scale=(320, 240), keep_ratio=True), # Kept original scale ratio + dict(type='Resize', scale=(960, 720), keep_ratio=True), # Kept original scale ratio + ], + [ + dict(type='RandomFlip', prob=1.0), + dict(type='RandomFlip', prob=0.0), + ], + [ + dict(type='Pad', pad_to_square=True, pad_val=dict(img=(114.0, 114.0, 114.0))), + ], + [ + dict(type='LoadAnnotations', with_bbox=True), + ], + [ + dict( + type='PackDetInputs', + meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape', 'scale_factor', 'flip', 'flip_direction') + ), + ] + ] + ) +] + +# Visualization Configuration +vis_backends = [dict(type='LocalVisBackend')] +visualizer = dict( + type='DetLocalVisualizer', + vis_backends=vis_backends, + name='visualizer' +) + +# Auto Scale Learning Rate Configuration +auto_scale_lr = dict(enable=False, base_batch_size=64) +log_processor = dict(type='LogProcessor', window_size=50, by_epoch=True) \ No newline at end of file diff --git a/modules/yolox/config/yolox_x_8x8_300e_coco-EARS-fine-tuning-test.py b/modules/yolox/config/yolox_x_8x8_300e_coco-EARS-fine-tuning-test.py new file mode 100644 index 0000000..2ba9f3e --- /dev/null +++ b/modules/yolox/config/yolox_x_8x8_300e_coco-EARS-fine-tuning-test.py @@ -0,0 +1,326 @@ +# Basic Settings +default_scope = 'mmdet' +log_level = 'INFO' +load_from = 'checkpoints/yolox_x_Black_only.pth' # Changed to YOLOX-X checkpoint +resume = False + +# Environment Configuration +env_cfg = dict( + cudnn_benchmark=False, + dist_cfg=dict(backend='nccl'), + mp_cfg=dict(mp_start_method='fork', opencv_num_threads=0) +) + +# Dataset and Data Root Configuration +data_root = 'data/White-lined-designs-Test-COCO/' +dataset_type = 'CocoDataset' +class_name = ['stethoscope'] +metainfo = dict( + classes=class_name, + palette=[(20, 220, 60)] +) + +# Image Size Configuration +img_scale = (640, 480) +img_scales = [(640, 480)] + +# Model Configuration +model = dict( + type='YOLOX', + data_preprocessor=dict( + type='DetDataPreprocessor', + pad_size_divisor=32, + batch_augments=[ + dict( + type='BatchSyncRandomResize', + random_size_range=(480, 480), + size_divisor=32, + interval=10 + ), + ], + ), + backbone=dict( + type='CSPDarknet', + deepen_factor=1.33, # Increased from 1.0 for YOLOX-X + widen_factor=1.25, # Increased from 1.0 for YOLOX-X + out_indices=(2, 3, 4), + use_depthwise=False, + spp_kernal_sizes=(5, 9, 13), + norm_cfg=dict(type='BN', momentum=0.03, eps=0.001), + act_cfg=dict(type='Swish'), + ), + neck=dict( + type='YOLOXPAFPN', + in_channels=[320, 640, 1280], # Changed according to widen_factor + out_channels=320, # Changed according to widen_factor + num_csp_blocks=4, # Increased from 3 for YOLOX-X + use_depthwise=False, + upsample_cfg=dict(scale_factor=2, mode='nearest'), + norm_cfg=dict(type='BN', momentum=0.03, eps=0.001), + act_cfg=dict(type='Swish'), + ), + bbox_head=dict( + type='YOLOXHead', + num_classes=1, + in_channels=320, # Changed according to neck out_channels + feat_channels=320, # Changed according to neck out_channels + strides=(8, 16, 32), + stacked_convs=2, + use_depthwise=False, + norm_cfg=dict(type='BN', momentum=0.03, eps=0.001), + act_cfg=dict(type='Swish'), + loss_cls=dict( + type='CrossEntropyLoss', + use_sigmoid=True, + reduction='sum', + loss_weight=1.0 + ), + loss_bbox=dict( + type='IoULoss', + mode='square', + eps=1e-16, + reduction='sum', + loss_weight=5.0 + ), + loss_obj=dict( + type='CrossEntropyLoss', + use_sigmoid=True, + reduction='sum', + loss_weight=1.0 + ), + loss_l1=dict(type='L1Loss', reduction='sum', loss_weight=1.0), + ), + train_cfg=dict(assigner=dict(type='SimOTAAssigner', center_radius=2.5)), + test_cfg=dict(score_thr=0.01, nms=dict(type='nms', iou_threshold=0.65)) +) + +# Training Configuration +max_epochs = 300 +num_last_epochs = 15 +base_lr = 0.01 * 64 / 128 # Adjusted for batch size +interval = 10 + +# Optimization Configuration +optim_wrapper = dict( + type='OptimWrapper', + optimizer=dict( + type='SGD', + lr=0.01 * 64 / 128, # Adjusted for batch size + momentum=0.9, + weight_decay=0.0005, + nesterov=True + ), + paramwise_cfg=dict( + norm_decay_mult=0.0, + bias_decay_mult=0.0 + ) +) + +# Learning Rate Schedule +param_scheduler = [ + dict( + type='mmdet.QuadraticWarmupLR', + by_epoch=True, + begin=0, + end=5, + convert_to_iter_based=True + ), + dict( + type='CosineAnnealingLR', + T_max=285, + eta_min=0.0005 * 64 / 128, # Adjusted for batch size + begin=5, + end=285, + by_epoch=True, + convert_to_iter_based=True + ), + dict( + type='ConstantLR', + factor=1, + begin=285, + end=300, + by_epoch=True + ), +] + +# Custom Hooks Configuration +custom_hooks = [ + dict( + type='YOLOXModeSwitchHook', + num_last_epochs=15, + priority=48 + ), + dict( + type='SyncNormHook', + priority=48 + ), + dict( + type='EMAHook', + ema_type='ExpMomentumEMA', + momentum=0.0001, + update_buffers=True, + priority=49 + ), +] + +# Default Hooks Configuration +default_hooks = dict( + timer=dict(type='IterTimerHook'), + logger=dict(type='LoggerHook', interval=50), + param_scheduler=dict(type='ParamSchedulerHook'), + checkpoint=dict(type='CheckpointHook', interval=10, max_keep_ckpts=3), + sampler_seed=dict(type='DistSamplerSeedHook'), + visualization=dict(type='DetVisualizationHook') +) + +# Data Loading Configuration +train_dataloader = dict( + batch_size=8, # Might need to reduce depending on GPU memory + num_workers=4, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( + type='MultiImageMixDataset', + dataset=dict( + type='CocoDataset', + data_root=data_root, + ann_file='train/annotations/instances_default.json', + data_prefix=dict(img='train/images/'), + metainfo=metainfo, + filter_cfg=dict(filter_empty_gt=False, min_size=32), + pipeline=[ + dict(type='LoadImageFromFile', backend_args=None), + dict(type='LoadAnnotations', with_bbox=True), + ], + backend_args=None + ), + pipeline=[ + dict(type='Mosaic', img_scale=(640, 480), pad_val=114.0), + dict( + type='RandomAffine', + scaling_ratio_range=(0.1, 2), + border=(-320, -240) + ), + dict( + type='MixUp', + img_scale=(640, 480), + ratio_range=(0.8, 1.6), + pad_val=114.0 + ), + dict(type='YOLOXHSVRandomAug'), + dict(type='RandomFlip', prob=0.5), + dict(type='Resize', scale=(640, 480), keep_ratio=True), + dict( + type='Pad', + pad_to_square=True, + pad_val=dict(img=(114.0, 114.0, 114.0)) + ), + dict( + type='FilterAnnotations', + min_gt_bbox_wh=(1, 1), + keep_empty=False + ), + dict(type='PackDetInputs') + ] + ) +) + +# Validation Configuration +val_dataloader = dict( + batch_size=8, # Might need to reduce depending on GPU memory + num_workers=4, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False), + dataset=dict( + type='CocoDataset', + data_root=data_root, + ann_file='valid/annotations/instances_default.json', + data_prefix=dict(img='valid/images/'), + test_mode=True, + metainfo=metainfo, + pipeline=[ + dict(type='LoadImageFromFile', backend_args=None), + dict(type='Resize', scale=(640, 480), keep_ratio=True), + dict( + type='Pad', + pad_to_square=True, + pad_val=dict(img=(114.0, 114.0, 114.0)) + ), + dict(type='LoadAnnotations', with_bbox=True), + dict( + type='PackDetInputs', + meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape', 'scale_factor') + ) + ] + ) +) + +# Test Configuration +test_dataloader = val_dataloader + +# Metrics and Evaluation Configuration +val_evaluator = dict( + type='CocoMetric', + ann_file=data_root + 'valid/annotations/instances_default.json', + metric='bbox', + backend_args=None +) +test_evaluator = val_evaluator + +# Training and Testing Loops Configuration +train_cfg = dict( + type='EpochBasedTrainLoop', + max_epochs=max_epochs, + val_interval=10 +) +val_cfg = dict(type='ValLoop') +test_cfg = dict(type='TestLoop') + +# Test Time Augmentation Configuration +tta_model = dict( + type='DetTTAModel', + tta_cfg=dict(nms=dict(type='nms', iou_threshold=0.65), max_per_img=100) +) + +tta_pipeline = [ + dict(type='LoadImageFromFile', backend_args=None), + dict( + type='TestTimeAug', + transforms=[ + [ + dict(type='Resize', scale=(640, 480), keep_ratio=True), + dict(type='Resize', scale=(320, 240), keep_ratio=True), + dict(type='Resize', scale=(960, 720), keep_ratio=True), + ], + [ + dict(type='RandomFlip', prob=1.0), + dict(type='RandomFlip', prob=0.0), + ], + [ + dict(type='Pad', pad_to_square=True, pad_val=dict(img=(114.0, 114.0, 114.0))), + ], + [ + dict(type='LoadAnnotations', with_bbox=True), + ], + [ + dict( + type='PackDetInputs', + meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape', 'scale_factor', 'flip', 'flip_direction') + ), + ] + ] + ) +] + +# Visualization Configuration +vis_backends = [dict(type='LocalVisBackend')] +visualizer = dict( + type='DetLocalVisualizer', + vis_backends=vis_backends, + name='visualizer' +) + +# Auto Scale Learning Rate Configuration +auto_scale_lr = dict(enable=False, base_batch_size=64) +log_processor = dict(type='LogProcessor', window_size=50, by_epoch=True) \ No newline at end of file diff --git a/modules/yolox/config/yolox_x_8x8_300e_coco-EARS-white.py b/modules/yolox/config/yolox_x_8x8_300e_coco-EARS-white.py new file mode 100644 index 0000000..7c7bbeb --- /dev/null +++ b/modules/yolox/config/yolox_x_8x8_300e_coco-EARS-white.py @@ -0,0 +1,326 @@ +# Basic Settings +default_scope = 'mmdet' +log_level = 'INFO' +load_from = 'checkpoints/raw-weight/yolox_x_8x8_300e_coco_20211126_140254-1ef88d67.pth' # Changed to YOLOX-X checkpoint +resume = False + +# Environment Configuration +env_cfg = dict( + cudnn_benchmark=False, + dist_cfg=dict(backend='nccl'), + mp_cfg=dict(mp_start_method='fork', opencv_num_threads=0) +) + +# Dataset and Data Root Configuration +data_root = 'data/White-lined-designs-COCO/' +dataset_type = 'CocoDataset' +class_name = ['stethoscope'] +metainfo = dict( + classes=class_name, + palette=[(20, 220, 60)] +) + +# Image Size Configuration +img_scale = (640, 480) +img_scales = [(640, 480)] + +# Model Configuration +model = dict( + type='YOLOX', + data_preprocessor=dict( + type='DetDataPreprocessor', + pad_size_divisor=32, + batch_augments=[ + dict( + type='BatchSyncRandomResize', + random_size_range=(480, 480), + size_divisor=32, + interval=10 + ), + ], + ), + backbone=dict( + type='CSPDarknet', + deepen_factor=1.33, # Increased from 1.0 for YOLOX-X + widen_factor=1.25, # Increased from 1.0 for YOLOX-X + out_indices=(2, 3, 4), + use_depthwise=False, + spp_kernal_sizes=(5, 9, 13), + norm_cfg=dict(type='BN', momentum=0.03, eps=0.001), + act_cfg=dict(type='Swish'), + ), + neck=dict( + type='YOLOXPAFPN', + in_channels=[320, 640, 1280], # Changed according to widen_factor + out_channels=320, # Changed according to widen_factor + num_csp_blocks=4, # Increased from 3 for YOLOX-X + use_depthwise=False, + upsample_cfg=dict(scale_factor=2, mode='nearest'), + norm_cfg=dict(type='BN', momentum=0.03, eps=0.001), + act_cfg=dict(type='Swish'), + ), + bbox_head=dict( + type='YOLOXHead', + num_classes=1, + in_channels=320, # Changed according to neck out_channels + feat_channels=320, # Changed according to neck out_channels + strides=(8, 16, 32), + stacked_convs=2, + use_depthwise=False, + norm_cfg=dict(type='BN', momentum=0.03, eps=0.001), + act_cfg=dict(type='Swish'), + loss_cls=dict( + type='CrossEntropyLoss', + use_sigmoid=True, + reduction='sum', + loss_weight=1.0 + ), + loss_bbox=dict( + type='IoULoss', + mode='square', + eps=1e-16, + reduction='sum', + loss_weight=5.0 + ), + loss_obj=dict( + type='CrossEntropyLoss', + use_sigmoid=True, + reduction='sum', + loss_weight=1.0 + ), + loss_l1=dict(type='L1Loss', reduction='sum', loss_weight=1.0), + ), + train_cfg=dict(assigner=dict(type='SimOTAAssigner', center_radius=2.5)), + test_cfg=dict(score_thr=0.01, nms=dict(type='nms', iou_threshold=0.65)) +) + +# Training Configuration +max_epochs = 300 +num_last_epochs = 15 +base_lr = 0.01 * 64 / 128 # Adjusted for batch size +interval = 10 + +# Optimization Configuration +optim_wrapper = dict( + type='OptimWrapper', + optimizer=dict( + type='SGD', + lr=0.01 * 64 / 128, # Adjusted for batch size + momentum=0.9, + weight_decay=0.0005, + nesterov=True + ), + paramwise_cfg=dict( + norm_decay_mult=0.0, + bias_decay_mult=0.0 + ) +) + +# Learning Rate Schedule +param_scheduler = [ + dict( + type='mmdet.QuadraticWarmupLR', + by_epoch=True, + begin=0, + end=5, + convert_to_iter_based=True + ), + dict( + type='CosineAnnealingLR', + T_max=285, + eta_min=0.0005 * 64 / 128, # Adjusted for batch size + begin=5, + end=285, + by_epoch=True, + convert_to_iter_based=True + ), + dict( + type='ConstantLR', + factor=1, + begin=285, + end=300, + by_epoch=True + ), +] + +# Custom Hooks Configuration +custom_hooks = [ + dict( + type='YOLOXModeSwitchHook', + num_last_epochs=15, + priority=48 + ), + dict( + type='SyncNormHook', + priority=48 + ), + dict( + type='EMAHook', + ema_type='ExpMomentumEMA', + momentum=0.0001, + update_buffers=True, + priority=49 + ), +] + +# Default Hooks Configuration +default_hooks = dict( + timer=dict(type='IterTimerHook'), + logger=dict(type='LoggerHook', interval=50), + param_scheduler=dict(type='ParamSchedulerHook'), + checkpoint=dict(type='CheckpointHook', interval=10, max_keep_ckpts=3), + sampler_seed=dict(type='DistSamplerSeedHook'), + visualization=dict(type='DetVisualizationHook') +) + +# Data Loading Configuration +train_dataloader = dict( + batch_size=8, # Might need to reduce depending on GPU memory + num_workers=4, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( + type='MultiImageMixDataset', + dataset=dict( + type='CocoDataset', + data_root=data_root, + ann_file='train/annotations/instances_default.json', + data_prefix=dict(img='train/images/'), + metainfo=metainfo, + filter_cfg=dict(filter_empty_gt=False, min_size=32), + pipeline=[ + dict(type='LoadImageFromFile', backend_args=None), + dict(type='LoadAnnotations', with_bbox=True), + ], + backend_args=None + ), + pipeline=[ + dict(type='Mosaic', img_scale=(640, 480), pad_val=114.0), + dict( + type='RandomAffine', + scaling_ratio_range=(0.1, 2), + border=(-320, -240) + ), + dict( + type='MixUp', + img_scale=(640, 480), + ratio_range=(0.8, 1.6), + pad_val=114.0 + ), + dict(type='YOLOXHSVRandomAug'), + dict(type='RandomFlip', prob=0.5), + dict(type='Resize', scale=(640, 480), keep_ratio=True), + dict( + type='Pad', + pad_to_square=True, + pad_val=dict(img=(114.0, 114.0, 114.0)) + ), + dict( + type='FilterAnnotations', + min_gt_bbox_wh=(1, 1), + keep_empty=False + ), + dict(type='PackDetInputs') + ] + ) +) + +# Validation Configuration +val_dataloader = dict( + batch_size=8, # Might need to reduce depending on GPU memory + num_workers=4, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False), + dataset=dict( + type='CocoDataset', + data_root=data_root, + ann_file='valid/annotations/instances_default.json', + data_prefix=dict(img='valid/images/'), + test_mode=True, + metainfo=metainfo, + pipeline=[ + dict(type='LoadImageFromFile', backend_args=None), + dict(type='Resize', scale=(640, 480), keep_ratio=True), + dict( + type='Pad', + pad_to_square=True, + pad_val=dict(img=(114.0, 114.0, 114.0)) + ), + dict(type='LoadAnnotations', with_bbox=True), + dict( + type='PackDetInputs', + meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape', 'scale_factor') + ) + ] + ) +) + +# Test Configuration +test_dataloader = val_dataloader + +# Metrics and Evaluation Configuration +val_evaluator = dict( + type='CocoMetric', + ann_file=data_root + 'valid/annotations/instances_default.json', + metric='bbox', + backend_args=None +) +test_evaluator = val_evaluator + +# Training and Testing Loops Configuration +train_cfg = dict( + type='EpochBasedTrainLoop', + max_epochs=max_epochs, + val_interval=10 +) +val_cfg = dict(type='ValLoop') +test_cfg = dict(type='TestLoop') + +# Test Time Augmentation Configuration +tta_model = dict( + type='DetTTAModel', + tta_cfg=dict(nms=dict(type='nms', iou_threshold=0.65), max_per_img=100) +) + +tta_pipeline = [ + dict(type='LoadImageFromFile', backend_args=None), + dict( + type='TestTimeAug', + transforms=[ + [ + dict(type='Resize', scale=(640, 480), keep_ratio=True), + dict(type='Resize', scale=(320, 240), keep_ratio=True), + dict(type='Resize', scale=(960, 720), keep_ratio=True), + ], + [ + dict(type='RandomFlip', prob=1.0), + dict(type='RandomFlip', prob=0.0), + ], + [ + dict(type='Pad', pad_to_square=True, pad_val=dict(img=(114.0, 114.0, 114.0))), + ], + [ + dict(type='LoadAnnotations', with_bbox=True), + ], + [ + dict( + type='PackDetInputs', + meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape', 'scale_factor', 'flip', 'flip_direction') + ), + ] + ] + ) +] + +# Visualization Configuration +vis_backends = [dict(type='LocalVisBackend')] +visualizer = dict( + type='DetLocalVisualizer', + vis_backends=vis_backends, + name='visualizer' +) + +# Auto Scale Learning Rate Configuration +auto_scale_lr = dict(enable=False, base_batch_size=64) +log_processor = dict(type='LogProcessor', window_size=50, by_epoch=True) \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index 71488a4..9420f80 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,10 +1,11 @@ -python-dotenv == 1.0.1 -opencv-python == 4.10.0.84 -requests == 2.32.3 -pandas == 2.2.2 -joblib == 1.4.2 -lightgbm == 4.5.0 -xgboost == 2.1.1 -scipy == 1.9.3 -numpy == 1.24.0 -scikit-learn == 1.5.1 \ No newline at end of file +python-dotenv==1.0.1 +opencv-python==4.10.0.84 +requests==2.32.3 +pandas==2.2.2 +joblib==1.4.2 +lightgbm==4.5.0 +xgboost==2.1.1 +scipy==1.9.3 +numpy==1.24.0 +scikit-learn == 1.5.1 +matplotlib==3.9.2 \ No newline at end of file