From c606913dbe3b539d68b140064bd3a9b5536d4df1 Mon Sep 17 00:00:00 2001 From: tackhwa <55059307+tackhwa@users.noreply.github.com> Date: Fri, 28 Jun 2024 19:24:49 +0800 Subject: [PATCH 01/10] new config file for ade20k and maskformer --- mmseg/configs/_base_/datasets/ade20k.py | 81 +++++++++ ...ormer_r101_d32_8xb2_160k_ade20k_512x512.py | 12 ++ ...former_r50_d32_8xb2_160k_ade20k_512x512.py | 171 ++++++++++++++++++ ...swin_s_upernet_8xb2_160k_ade20k_512x512.py | 94 ++++++++++ ...swin_t_upernet_8xb2_160k_ade20k_512x512.py | 94 ++++++++++ 5 files changed, 452 insertions(+) create mode 100644 mmseg/configs/_base_/datasets/ade20k.py create mode 100644 mmseg/configs/maskformer/maskformer_r101_d32_8xb2_160k_ade20k_512x512.py create mode 100644 mmseg/configs/maskformer/maskformer_r50_d32_8xb2_160k_ade20k_512x512.py create mode 100644 mmseg/configs/maskformer/maskformer_swin_s_upernet_8xb2_160k_ade20k_512x512.py create mode 100644 mmseg/configs/maskformer/maskformer_swin_t_upernet_8xb2_160k_ade20k_512x512.py diff --git a/mmseg/configs/_base_/datasets/ade20k.py b/mmseg/configs/_base_/datasets/ade20k.py new file mode 100644 index 0000000000..4abf3aa2b8 --- /dev/null +++ b/mmseg/configs/_base_/datasets/ade20k.py @@ -0,0 +1,81 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from mmcv.transforms.loading import LoadImageFromFile +from mmcv.transforms.processing import (RandomFlip, RandomResize, Resize, + TestTimeAug) +from mmengine.dataset.sampler import DefaultSampler, InfiniteSampler + +from mmseg.datasets.ade import ADE20KDataset +from mmseg.datasets.transforms.formatting import PackSegInputs +from mmseg.datasets.transforms.loading import LoadAnnotations +from mmseg.datasets.transforms.transforms import (PhotoMetricDistortion, + RandomCrop) +from mmseg.evaluation import IoUMetric + +# dataset settings +dataset_type = ADE20KDataset +data_root = 'data/ade/ADEChallengeData2016' +crop_size = (512, 512) +train_pipeline = [ + dict(type=LoadImageFromFile), + dict(type=LoadAnnotations, reduce_zero_label=True), + dict( + type=RandomResize, + scale=(2048, 512), + ratio_range=(0.5, 2.0), + keep_ratio=True), + dict(type=RandomCrop, crop_size=crop_size, cat_max_ratio=0.75), + dict(type=RandomFlip, prob=0.5), + dict(type=PhotoMetricDistortion), + dict(type=PackSegInputs) +] +test_pipeline = [ + dict(type=LoadImageFromFile), + dict(type=Resize, scale=(2048, 512), keep_ratio=True), + # add loading annotation after ``Resize`` because ground truth + # does not need to do resize data transform + dict(type=LoadAnnotations, reduce_zero_label=True), + dict(type=PackSegInputs) +] +img_ratios = [0.5, 0.75, 1.0, 1.25, 1.5, 1.75] +tta_pipeline = [ + dict(type=LoadImageFromFile, backend_args=None), + dict( + type=TestTimeAug, + transforms=[ + [ + dict(type=Resize, scale_factor=r, keep_ratio=True) + for r in img_ratios + ], + [ + dict(type=RandomFlip, prob=0., direction='horizontal'), + dict(type=RandomFlip, prob=1., direction='horizontal') + ], [dict(type=LoadAnnotations)], [dict(type=PackSegInputs)] + ]) +] +train_dataloader = dict( + batch_size=4, + num_workers=4, + persistent_workers=True, + sampler=dict(type=InfiniteSampler, shuffle=True), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_prefix=dict( + img_path='images/training', seg_map_path='annotations/training'), + pipeline=train_pipeline)) +val_dataloader = dict( + batch_size=1, + num_workers=4, + persistent_workers=True, + sampler=dict(type=DefaultSampler, shuffle=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_prefix=dict( + img_path='images/validation', + seg_map_path='annotations/validation'), + pipeline=test_pipeline)) +test_dataloader = val_dataloader + +val_evaluator = dict(type=IoUMetric, iou_metrics=['mIoU']) +test_evaluator = val_evaluator diff --git a/mmseg/configs/maskformer/maskformer_r101_d32_8xb2_160k_ade20k_512x512.py b/mmseg/configs/maskformer/maskformer_r101_d32_8xb2_160k_ade20k_512x512.py new file mode 100644 index 0000000000..9b9f42568d --- /dev/null +++ b/mmseg/configs/maskformer/maskformer_r101_d32_8xb2_160k_ade20k_512x512.py @@ -0,0 +1,12 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from mmengine.config import read_base + +with read_base(): + from .maskformer_r50_d32_8xb2_160k_ade20k_512x512 import * + +model.update( + dict( + backbone=dict( + depth=101, + init_cfg=dict(type=PretrainedInit, + checkpoint='torchvision://resnet101')))) diff --git a/mmseg/configs/maskformer/maskformer_r50_d32_8xb2_160k_ade20k_512x512.py b/mmseg/configs/maskformer/maskformer_r50_d32_8xb2_160k_ade20k_512x512.py new file mode 100644 index 0000000000..4445e2767c --- /dev/null +++ b/mmseg/configs/maskformer/maskformer_r50_d32_8xb2_160k_ade20k_512x512.py @@ -0,0 +1,171 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from mmengine.config import read_base +from mmengine.model.weight_init import PretrainedInit +from mmengine.optim.scheduler.lr_scheduler import PolyLR + +from torch.nn.modules.activation import ReLU +from torch.nn.modules.batchnorm import SyncBatchNorm as SyncBN +from torch.nn.modules.normalization import GroupNorm as GN +from torch.optim.adamw import AdamW + +from mmseg.models.backbones import ResNet +from mmseg.models.data_preprocessor import SegDataPreProcessor +from mmseg.models.decode_heads import MaskFormerHead +from mmdet.models.losses import CrossEntropyLoss +from mmseg.models.segmentors import EncoderDecoder + +from mmdet.models.layers import PixelDecoder +from mmdet.models.losses.focal_loss import FocalLoss +from mmdet.models.losses.dice_loss import DiceLoss +from mmdet.models.task_modules.assigners import (HungarianAssigner, + ClassificationCost) +from mmdet.models.task_modules.assigners.match_cost import (FocalLossCost, + DiceCost) +from mmdet.models.task_modules.samplers.mask_pseudo_sampler import MaskPseudoSampler + +with read_base(): + from .._base_.datasets.ade20k import * + from .._base_.default_runtime import * + from .._base_.schedules.schedule_160k import * + +norm_cfg = dict(type=SyncBN, requires_grad=True) +crop_size = (512, 512) +data_preprocessor = dict( + type=SegDataPreProcessor, + size=crop_size, + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True, + pad_val=0, + seg_pad_val=255) +# model_cfg +num_classes = 150 +model = dict( + type=EncoderDecoder, + data_preprocessor=data_preprocessor, + backbone=dict( + type=ResNet, + depth=50, + num_stages=4, + out_indices=(0, 1, 2, 3), + dilations=(1, 1, 1, 1), + strides=(1, 2, 2, 2), + norm_cfg=norm_cfg, + norm_eval=True, + style='pytorch', + contract_dilation=True, + init_cfg=dict(type=PretrainedInit, checkpoint='torchvision://resnet50')), + decode_head=dict( + type=MaskFormerHead, + in_channels=[256, 512, 1024, + 2048], # input channels of pixel_decoder modules + feat_channels=256, + in_index=[0, 1, 2, 3], + num_classes=150, + out_channels=256, + num_queries=100, + pixel_decoder=dict( + type=PixelDecoder, + norm_cfg=dict(type=GN, num_groups=32), + act_cfg=dict(type=ReLU)), + enforce_decoder_input_project=False, + positional_encoding=dict( # SinePositionalEncoding + num_feats=128, normalize=True), + transformer_decoder=dict( # DetrTransformerDecoder + return_intermediate=True, + num_layers=6, + layer_cfg=dict( # DetrTransformerDecoderLayer + self_attn_cfg=dict( # MultiheadAttention + embed_dims=256, + num_heads=8, + attn_drop=0.1, + proj_drop=0.1, + dropout_layer=None, + batch_first=True), + cross_attn_cfg=dict( # MultiheadAttention + embed_dims=256, + num_heads=8, + attn_drop=0.1, + proj_drop=0.1, + dropout_layer=None, + batch_first=True), + ffn_cfg=dict( + embed_dims=256, + feedforward_channels=2048, + num_fcs=2, + act_cfg=dict(type=ReLU, inplace=True), + ffn_drop=0.1, + dropout_layer=None, + add_identity=True)), + init_cfg=None), + loss_cls=dict( + type=CrossEntropyLoss, + use_sigmoid=False, + loss_weight=1.0, + reduction='mean', + class_weight=[1.0] * num_classes + [0.1]), + loss_mask=dict( + type=FocalLoss, + use_sigmoid=True, + gamma=2.0, + alpha=0.25, + reduction='mean', + loss_weight=20.0), + loss_dice=dict( + type=DiceLoss, + use_sigmoid=True, + activate=True, + reduction='mean', + naive_dice=True, + eps=1.0, + loss_weight=1.0), + train_cfg=dict( + assigner=dict( + type=HungarianAssigner, + match_costs=[ + dict(type=ClassificationCost, weight=1.0), + dict( + type=FocalLossCost, + weight=20.0, + binary_input=True), + dict( + type=DiceCost, + weight=1.0, + pred_act=True, + eps=1.0) + ]), + sampler=dict(type=MaskPseudoSampler))), + # training and testing settings + train_cfg=dict(), + test_cfg=dict(mode='whole'), +) +# optimizer +optimizer.update( + dict( + type=AdamW, lr=0.0001, betas=(0.9, 0.999), weight_decay=0.0001)) +optim_wrapper.merge( + dict( + _delete_=True, + type=OptimWrapper, + optimizer=optimizer, + clip_grad=dict(max_norm=0.01, norm_type=2), + paramwise_cfg=dict(custom_keys={ + 'backbone': dict(lr_mult=0.1), + }))) +# learning policy +param_scheduler = [ + dict( + type=PolyLR, + eta_min=0, + power=0.9, + begin=0, + end=160000, + by_epoch=False) +] + +# In MaskFormer implementation we use batch size 2 per GPU as default +train_dataloader.update(dict(batch_size=2, num_workers=2)) +val_dataloader.update(dict(batch_size=1, num_workers=4)) +test_dataloader = val_dataloader + +train_cfg.update(dict(val_interval=100)) diff --git a/mmseg/configs/maskformer/maskformer_swin_s_upernet_8xb2_160k_ade20k_512x512.py b/mmseg/configs/maskformer/maskformer_swin_s_upernet_8xb2_160k_ade20k_512x512.py new file mode 100644 index 0000000000..d6ad9792e4 --- /dev/null +++ b/mmseg/configs/maskformer/maskformer_swin_s_upernet_8xb2_160k_ade20k_512x512.py @@ -0,0 +1,94 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from mmengine.config import read_base +from mmengine.optim.scheduler.lr_scheduler import LinearLR + +from torch.nn.modules.activation import GELU +from torch.nn.modules.normalization import LayerNorm as LN + +from mmseg.models.backbones import SwinTransformer + +with read_base(): + from .maskformer_r50_d32_8xb2_160k_ade20k_512x512 import * + +checkpoint_file = 'https://download.openmmlab.com/mmsegmentation/v0.5/pretrain/swin/swin_small_patch4_window7_224_20220317-7ba6d6dd.pth' # noqa + +backbone_norm_cfg = dict(type=LN, requires_grad=True) +depths = [2, 2, 18, 2] +model.merge( + dict( + backbone=dict( + _delete_=True, + type=SwinTransformer, + pretrain_img_size=224, + embed_dims=96, + patch_size=4, + window_size=7, + mlp_ratio=4, + depths=depths, + num_heads=[3, 6, 12, 24], + strides=(4, 2, 2, 2), + out_indices=(0, 1, 2, 3), + qkv_bias=True, + qk_scale=None, + patch_norm=True, + drop_rate=0., + attn_drop_rate=0., + drop_path_rate=0.3, + use_abs_pos_embed=False, + act_cfg=dict(type=GELU), + norm_cfg=backbone_norm_cfg, + init_cfg=dict(type=PretrainedInit, checkpoint=checkpoint_file)), + decode_head=dict( + type=MaskFormerHead, + in_channels=[96, 192, 384, + 768], # input channels of pixel_decoder modules + ))) + +# optimizer +optimizer.update( + dict( + type=AdamW, lr=0.00006, betas=(0.9, 0.999), weight_decay=0.01)) +# set all layers in backbone to lr_mult=1.0 +# set all norm layers, position_embeding, +# query_embeding to decay_multi=0.0 +backbone_norm_multi = dict(lr_mult=1.0, decay_mult=0.0) +backbone_embed_multi = dict(lr_mult=1.0, decay_mult=0.0) +embed_multi = dict(decay_mult=0.0) +custom_keys = { + 'backbone': dict(lr_mult=1.0), + 'backbone.patch_embed.norm': backbone_norm_multi, + 'backbone.norm': backbone_norm_multi, + 'relative_position_bias_table': backbone_embed_multi, + 'query_embed': embed_multi, +} +custom_keys.update({ + f'backbone.stages.{stage_id}.blocks.{block_id}.norm': backbone_norm_multi + for stage_id, num_blocks in enumerate(depths) + for block_id in range(num_blocks) +}) +custom_keys.update({ + f'backbone.stages.{stage_id}.downsample.norm': backbone_norm_multi + for stage_id in range(len(depths) - 1) +}) +# optimizer +optim_wrapper.merge( + dict( + _delete_=True, + type=OptimWrapper, + optimizer=optimizer, + clip_grad=dict(max_norm=0.01, norm_type=2), + paramwise_cfg=dict(custom_keys=custom_keys))) + +# learning policy +param_scheduler = [ + dict( + type=LinearLR, start_factor=1e-6, by_epoch=False, begin=0, end=1500), + dict( + type=PolyLR, + eta_min=0.0, + power=1.0, + begin=1500, + end=160000, + by_epoch=False, + ) +] diff --git a/mmseg/configs/maskformer/maskformer_swin_t_upernet_8xb2_160k_ade20k_512x512.py b/mmseg/configs/maskformer/maskformer_swin_t_upernet_8xb2_160k_ade20k_512x512.py new file mode 100644 index 0000000000..1cdd393669 --- /dev/null +++ b/mmseg/configs/maskformer/maskformer_swin_t_upernet_8xb2_160k_ade20k_512x512.py @@ -0,0 +1,94 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from mmengine.config import read_base +from mmengine.optim.scheduler.lr_scheduler import LinearLR, PolyLR + +from torch.nn.modules.activation import GELU +from torch.nn.modules.normalization import LayerNorm as LN + +from mmseg.models.backbones import SwinTransformer + +with read_base(): + from .maskformer_r50_d32_8xb2_160k_ade20k_512x512 import * + +checkpoint_file = 'https://download.openmmlab.com/mmsegmentation/v0.5/pretrain/swin/swin_tiny_patch4_window7_224_20220317-1cdeb081.pth' # noqa +backbone_norm_cfg = dict(type=LN, requires_grad=True) +depths = [2, 2, 6, 2] +model.merge( + dict( + backbone=dict( + _delete_=True, + type=SwinTransformer, + pretrain_img_size=224, + embed_dims=96, + patch_size=4, + window_size=7, + mlp_ratio=4, + depths=depths, + num_heads=[3, 6, 12, 24], + strides=(4, 2, 2, 2), + out_indices=(0, 1, 2, 3), + qkv_bias=True, + qk_scale=None, + patch_norm=True, + drop_rate=0., + attn_drop_rate=0., + drop_path_rate=0.3, + use_abs_pos_embed=False, + act_cfg=dict(type=GELU), + norm_cfg=backbone_norm_cfg, + init_cfg=dict(type=PretrainedInit, checkpoint=checkpoint_file)), + decode_head=dict( + type=MaskFormerHead, + in_channels=[96, 192, 384, + 768], # input channels of pixel_decoder modules + ))) + +# optimizer +optimizer.update( + dict( + type=AdamW, lr=0.00006, betas=(0.9, 0.999), weight_decay=0.01)) + +# set all layers in backbone to lr_mult=1.0 +# set all norm layers, position_embeding, +# query_embeding to decay_multi=0.0 +backbone_norm_multi = dict(lr_mult=1.0, decay_mult=0.0) +backbone_embed_multi = dict(lr_mult=1.0, decay_mult=0.0) +embed_multi = dict(decay_mult=0.0) +custom_keys = { + 'backbone': dict(lr_mult=1.0), + 'backbone.patch_embed.norm': backbone_norm_multi, + 'backbone.norm': backbone_norm_multi, + 'relative_position_bias_table': backbone_embed_multi, + 'query_embed': embed_multi, +} +custom_keys.update({ + f'backbone.stages.{stage_id}.blocks.{block_id}.norm': backbone_norm_multi + for stage_id, num_blocks in enumerate(depths) + for block_id in range(num_blocks) +}) +custom_keys.update({ + f'backbone.stages.{stage_id}.downsample.norm': backbone_norm_multi + for stage_id in range(len(depths) - 1) +}) +# optimizer +optim_wrapper.merge( + dict( + _delete_=True, + type=OptimWrapper, + optimizer=optimizer, + clip_grad=dict(max_norm=0.01, norm_type=2), + paramwise_cfg=dict(custom_keys=custom_keys))) + +# learning policy +param_scheduler = [ + dict( + type=LinearLR, start_factor=1e-6, by_epoch=False, begin=0, end=1500), + dict( + type=PolyLR, + eta_min=0.0, + power=1.0, + begin=1500, + end=160000, + by_epoch=False, + ) +] From 28a93c1fb2a2486b61491e2b845a1017d1e56b64 Mon Sep 17 00:00:00 2001 From: tackhwa <55059307+tackhwa@users.noreply.github.com> Date: Fri, 28 Jun 2024 19:45:36 +0800 Subject: [PATCH 02/10] fix lint --- mmseg/configs/_base_/datasets/ade20k.py | 19 ++++---- ...ormer_r101_d32_8xb2_160k_ade20k_512x512.py | 4 +- ...former_r50_d32_8xb2_160k_ade20k_512x512.py | 47 +++++++------------ ...swin_s_upernet_8xb2_160k_ade20k_512x512.py | 9 ++-- ...swin_t_upernet_8xb2_160k_ade20k_512x512.py | 9 ++-- 5 files changed, 33 insertions(+), 55 deletions(-) diff --git a/mmseg/configs/_base_/datasets/ade20k.py b/mmseg/configs/_base_/datasets/ade20k.py index 4abf3aa2b8..22e5929157 100644 --- a/mmseg/configs/_base_/datasets/ade20k.py +++ b/mmseg/configs/_base_/datasets/ade20k.py @@ -41,16 +41,15 @@ dict(type=LoadImageFromFile, backend_args=None), dict( type=TestTimeAug, - transforms=[ - [ - dict(type=Resize, scale_factor=r, keep_ratio=True) - for r in img_ratios - ], - [ - dict(type=RandomFlip, prob=0., direction='horizontal'), - dict(type=RandomFlip, prob=1., direction='horizontal') - ], [dict(type=LoadAnnotations)], [dict(type=PackSegInputs)] - ]) + transforms=[[ + dict(type=Resize, scale_factor=r, keep_ratio=True) + for r in img_ratios + ], + [ + dict(type=RandomFlip, prob=0., direction='horizontal'), + dict(type=RandomFlip, prob=1., direction='horizontal') + ], [dict(type=LoadAnnotations)], + [dict(type=PackSegInputs)]]) ] train_dataloader = dict( batch_size=4, diff --git a/mmseg/configs/maskformer/maskformer_r101_d32_8xb2_160k_ade20k_512x512.py b/mmseg/configs/maskformer/maskformer_r101_d32_8xb2_160k_ade20k_512x512.py index 9b9f42568d..f83e4e67bd 100644 --- a/mmseg/configs/maskformer/maskformer_r101_d32_8xb2_160k_ade20k_512x512.py +++ b/mmseg/configs/maskformer/maskformer_r101_d32_8xb2_160k_ade20k_512x512.py @@ -8,5 +8,5 @@ dict( backbone=dict( depth=101, - init_cfg=dict(type=PretrainedInit, - checkpoint='torchvision://resnet101')))) + init_cfg=dict( + type=PretrainedInit, checkpoint='torchvision://resnet101')))) diff --git a/mmseg/configs/maskformer/maskformer_r50_d32_8xb2_160k_ade20k_512x512.py b/mmseg/configs/maskformer/maskformer_r50_d32_8xb2_160k_ade20k_512x512.py index 4445e2767c..8afeaab154 100644 --- a/mmseg/configs/maskformer/maskformer_r50_d32_8xb2_160k_ade20k_512x512.py +++ b/mmseg/configs/maskformer/maskformer_r50_d32_8xb2_160k_ade20k_512x512.py @@ -2,27 +2,26 @@ from mmengine.config import read_base from mmengine.model.weight_init import PretrainedInit from mmengine.optim.scheduler.lr_scheduler import PolyLR - from torch.nn.modules.activation import ReLU from torch.nn.modules.batchnorm import SyncBatchNorm as SyncBN from torch.nn.modules.normalization import GroupNorm as GN from torch.optim.adamw import AdamW +from mmdet.models.layers import PixelDecoder +from mmdet.models.losses import CrossEntropyLoss +from mmdet.models.losses.dice_loss import DiceLoss +from mmdet.models.losses.focal_loss import FocalLoss +from mmdet.models.task_modules.assigners import (ClassificationCost, + HungarianAssigner) +from mmdet.models.task_modules.assigners.match_cost import (DiceCost, + FocalLossCost) +from mmdet.models.task_modules.samplers.mask_pseudo_sampler import \ + MaskPseudoSampler from mmseg.models.backbones import ResNet from mmseg.models.data_preprocessor import SegDataPreProcessor from mmseg.models.decode_heads import MaskFormerHead -from mmdet.models.losses import CrossEntropyLoss from mmseg.models.segmentors import EncoderDecoder -from mmdet.models.layers import PixelDecoder -from mmdet.models.losses.focal_loss import FocalLoss -from mmdet.models.losses.dice_loss import DiceLoss -from mmdet.models.task_modules.assigners import (HungarianAssigner, - ClassificationCost) -from mmdet.models.task_modules.assigners.match_cost import (FocalLossCost, - DiceCost) -from mmdet.models.task_modules.samplers.mask_pseudo_sampler import MaskPseudoSampler - with read_base(): from .._base_.datasets.ade20k import * from .._base_.default_runtime import * @@ -54,7 +53,8 @@ norm_eval=True, style='pytorch', contract_dilation=True, - init_cfg=dict(type=PretrainedInit, checkpoint='torchvision://resnet50')), + init_cfg=dict( + type=PretrainedInit, checkpoint='torchvision://resnet50')), decode_head=dict( type=MaskFormerHead, in_channels=[256, 512, 1024, @@ -124,15 +124,8 @@ type=HungarianAssigner, match_costs=[ dict(type=ClassificationCost, weight=1.0), - dict( - type=FocalLossCost, - weight=20.0, - binary_input=True), - dict( - type=DiceCost, - weight=1.0, - pred_act=True, - eps=1.0) + dict(type=FocalLossCost, weight=20.0, binary_input=True), + dict(type=DiceCost, weight=1.0, pred_act=True, eps=1.0) ]), sampler=dict(type=MaskPseudoSampler))), # training and testing settings @@ -141,8 +134,7 @@ ) # optimizer optimizer.update( - dict( - type=AdamW, lr=0.0001, betas=(0.9, 0.999), weight_decay=0.0001)) + dict(type=AdamW, lr=0.0001, betas=(0.9, 0.999), weight_decay=0.0001)) optim_wrapper.merge( dict( _delete_=True, @@ -155,17 +147,10 @@ # learning policy param_scheduler = [ dict( - type=PolyLR, - eta_min=0, - power=0.9, - begin=0, - end=160000, - by_epoch=False) + type=PolyLR, eta_min=0, power=0.9, begin=0, end=160000, by_epoch=False) ] # In MaskFormer implementation we use batch size 2 per GPU as default train_dataloader.update(dict(batch_size=2, num_workers=2)) val_dataloader.update(dict(batch_size=1, num_workers=4)) test_dataloader = val_dataloader - -train_cfg.update(dict(val_interval=100)) diff --git a/mmseg/configs/maskformer/maskformer_swin_s_upernet_8xb2_160k_ade20k_512x512.py b/mmseg/configs/maskformer/maskformer_swin_s_upernet_8xb2_160k_ade20k_512x512.py index d6ad9792e4..7719f160c7 100644 --- a/mmseg/configs/maskformer/maskformer_swin_s_upernet_8xb2_160k_ade20k_512x512.py +++ b/mmseg/configs/maskformer/maskformer_swin_s_upernet_8xb2_160k_ade20k_512x512.py @@ -1,7 +1,6 @@ # Copyright (c) OpenMMLab. All rights reserved. from mmengine.config import read_base from mmengine.optim.scheduler.lr_scheduler import LinearLR - from torch.nn.modules.activation import GELU from torch.nn.modules.normalization import LayerNorm as LN @@ -41,13 +40,12 @@ decode_head=dict( type=MaskFormerHead, in_channels=[96, 192, 384, - 768], # input channels of pixel_decoder modules + 768], # input channels of pixel_decoder modules ))) # optimizer optimizer.update( - dict( - type=AdamW, lr=0.00006, betas=(0.9, 0.999), weight_decay=0.01)) + dict(type=AdamW, lr=0.00006, betas=(0.9, 0.999), weight_decay=0.01)) # set all layers in backbone to lr_mult=1.0 # set all norm layers, position_embeding, # query_embeding to decay_multi=0.0 @@ -81,8 +79,7 @@ # learning policy param_scheduler = [ - dict( - type=LinearLR, start_factor=1e-6, by_epoch=False, begin=0, end=1500), + dict(type=LinearLR, start_factor=1e-6, by_epoch=False, begin=0, end=1500), dict( type=PolyLR, eta_min=0.0, diff --git a/mmseg/configs/maskformer/maskformer_swin_t_upernet_8xb2_160k_ade20k_512x512.py b/mmseg/configs/maskformer/maskformer_swin_t_upernet_8xb2_160k_ade20k_512x512.py index 1cdd393669..4370fb762c 100644 --- a/mmseg/configs/maskformer/maskformer_swin_t_upernet_8xb2_160k_ade20k_512x512.py +++ b/mmseg/configs/maskformer/maskformer_swin_t_upernet_8xb2_160k_ade20k_512x512.py @@ -1,7 +1,6 @@ # Copyright (c) OpenMMLab. All rights reserved. from mmengine.config import read_base from mmengine.optim.scheduler.lr_scheduler import LinearLR, PolyLR - from torch.nn.modules.activation import GELU from torch.nn.modules.normalization import LayerNorm as LN @@ -40,13 +39,12 @@ decode_head=dict( type=MaskFormerHead, in_channels=[96, 192, 384, - 768], # input channels of pixel_decoder modules + 768], # input channels of pixel_decoder modules ))) # optimizer optimizer.update( - dict( - type=AdamW, lr=0.00006, betas=(0.9, 0.999), weight_decay=0.01)) + dict(type=AdamW, lr=0.00006, betas=(0.9, 0.999), weight_decay=0.01)) # set all layers in backbone to lr_mult=1.0 # set all norm layers, position_embeding, @@ -81,8 +79,7 @@ # learning policy param_scheduler = [ - dict( - type=LinearLR, start_factor=1e-6, by_epoch=False, begin=0, end=1500), + dict(type=LinearLR, start_factor=1e-6, by_epoch=False, begin=0, end=1500), dict( type=PolyLR, eta_min=0.0, From 945cc68dcbd4ac524dfc2af0367c22cbd5a4d839 Mon Sep 17 00:00:00 2001 From: tackhwa <55059307+tackhwa@users.noreply.github.com> Date: Fri, 28 Jun 2024 20:24:03 +0800 Subject: [PATCH 03/10] Update maskformer_r50_d32_8xb2_160k_ade20k_512x512.py --- .../maskformer/maskformer_r50_d32_8xb2_160k_ade20k_512x512.py | 1 + 1 file changed, 1 insertion(+) diff --git a/mmseg/configs/maskformer/maskformer_r50_d32_8xb2_160k_ade20k_512x512.py b/mmseg/configs/maskformer/maskformer_r50_d32_8xb2_160k_ade20k_512x512.py index 8afeaab154..e318e47e40 100644 --- a/mmseg/configs/maskformer/maskformer_r50_d32_8xb2_160k_ade20k_512x512.py +++ b/mmseg/configs/maskformer/maskformer_r50_d32_8xb2_160k_ade20k_512x512.py @@ -154,3 +154,4 @@ train_dataloader.update(dict(batch_size=2, num_workers=2)) val_dataloader.update(dict(batch_size=1, num_workers=4)) test_dataloader = val_dataloader + From c1db411f6a56a956bb00a4adeca1afd92e8cab54 Mon Sep 17 00:00:00 2001 From: tackhwa <55059307+tackhwa@users.noreply.github.com> Date: Fri, 28 Jun 2024 20:24:13 +0800 Subject: [PATCH 04/10] Update maskformer_r50_d32_8xb2_160k_ade20k_512x512.py --- .../maskformer/maskformer_r50_d32_8xb2_160k_ade20k_512x512.py | 1 - 1 file changed, 1 deletion(-) diff --git a/mmseg/configs/maskformer/maskformer_r50_d32_8xb2_160k_ade20k_512x512.py b/mmseg/configs/maskformer/maskformer_r50_d32_8xb2_160k_ade20k_512x512.py index e318e47e40..8afeaab154 100644 --- a/mmseg/configs/maskformer/maskformer_r50_d32_8xb2_160k_ade20k_512x512.py +++ b/mmseg/configs/maskformer/maskformer_r50_d32_8xb2_160k_ade20k_512x512.py @@ -154,4 +154,3 @@ train_dataloader.update(dict(batch_size=2, num_workers=2)) val_dataloader.update(dict(batch_size=1, num_workers=4)) test_dataloader = val_dataloader - From 19f3699d81b5f6a9eb56d8542f645f7264a2ef3b Mon Sep 17 00:00:00 2001 From: tackhwa <55059307+tackhwa@users.noreply.github.com> Date: Fri, 28 Jun 2024 20:24:13 +0800 Subject: [PATCH 05/10] Update maskformer_r50_d32_8xb2_160k_ade20k_512x512.py --- .../maskformer/maskformer_r50_d32_8xb2_160k_ade20k_512x512.py | 1 - 1 file changed, 1 deletion(-) diff --git a/mmseg/configs/maskformer/maskformer_r50_d32_8xb2_160k_ade20k_512x512.py b/mmseg/configs/maskformer/maskformer_r50_d32_8xb2_160k_ade20k_512x512.py index e318e47e40..8afeaab154 100644 --- a/mmseg/configs/maskformer/maskformer_r50_d32_8xb2_160k_ade20k_512x512.py +++ b/mmseg/configs/maskformer/maskformer_r50_d32_8xb2_160k_ade20k_512x512.py @@ -154,4 +154,3 @@ train_dataloader.update(dict(batch_size=2, num_workers=2)) val_dataloader.update(dict(batch_size=1, num_workers=4)) test_dataloader = val_dataloader - From b2cff7254c7ffc8851476b75f0efcc59ca4f6d13 Mon Sep 17 00:00:00 2001 From: tackhwa <55059307+tackhwa@users.noreply.github.com> Date: Fri, 28 Jun 2024 22:17:42 +0800 Subject: [PATCH 06/10] fix isort --- .../maskformer/maskformer_r50_d32_8xb2_160k_ade20k_512x512.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/mmseg/configs/maskformer/maskformer_r50_d32_8xb2_160k_ade20k_512x512.py b/mmseg/configs/maskformer/maskformer_r50_d32_8xb2_160k_ade20k_512x512.py index 8afeaab154..62942df074 100644 --- a/mmseg/configs/maskformer/maskformer_r50_d32_8xb2_160k_ade20k_512x512.py +++ b/mmseg/configs/maskformer/maskformer_r50_d32_8xb2_160k_ade20k_512x512.py @@ -7,10 +7,11 @@ from torch.nn.modules.normalization import GroupNorm as GN from torch.optim.adamw import AdamW -from mmdet.models.layers import PixelDecoder + from mmdet.models.losses import CrossEntropyLoss from mmdet.models.losses.dice_loss import DiceLoss from mmdet.models.losses.focal_loss import FocalLoss +from mmdet.models.layers import PixelDecoder from mmdet.models.task_modules.assigners import (ClassificationCost, HungarianAssigner) from mmdet.models.task_modules.assigners.match_cost import (DiceCost, From 4d2a9d09d538165d45b5e870b12bad632c38088a Mon Sep 17 00:00:00 2001 From: tackhwa <55059307+tackhwa@users.noreply.github.com> Date: Fri, 28 Jun 2024 22:32:09 +0800 Subject: [PATCH 07/10] Update maskformer_r50_d32_8xb2_160k_ade20k_512x512.py --- .../maskformer/maskformer_r50_d32_8xb2_160k_ade20k_512x512.py | 1 - 1 file changed, 1 deletion(-) diff --git a/mmseg/configs/maskformer/maskformer_r50_d32_8xb2_160k_ade20k_512x512.py b/mmseg/configs/maskformer/maskformer_r50_d32_8xb2_160k_ade20k_512x512.py index 62942df074..3c8ccf1208 100644 --- a/mmseg/configs/maskformer/maskformer_r50_d32_8xb2_160k_ade20k_512x512.py +++ b/mmseg/configs/maskformer/maskformer_r50_d32_8xb2_160k_ade20k_512x512.py @@ -7,7 +7,6 @@ from torch.nn.modules.normalization import GroupNorm as GN from torch.optim.adamw import AdamW - from mmdet.models.losses import CrossEntropyLoss from mmdet.models.losses.dice_loss import DiceLoss from mmdet.models.losses.focal_loss import FocalLoss From 2fca6a935ec02e0769fbd35f0af6745758450165 Mon Sep 17 00:00:00 2001 From: tackhwa Date: Sat, 29 Jun 2024 02:36:28 +0800 Subject: [PATCH 08/10] fix isort issue --- ...kformer_r50_d32_8xb2_160k_ade20k_512x512.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/mmseg/configs/maskformer/maskformer_r50_d32_8xb2_160k_ade20k_512x512.py b/mmseg/configs/maskformer/maskformer_r50_d32_8xb2_160k_ade20k_512x512.py index 3c8ccf1208..336a2adc87 100644 --- a/mmseg/configs/maskformer/maskformer_r50_d32_8xb2_160k_ade20k_512x512.py +++ b/mmseg/configs/maskformer/maskformer_r50_d32_8xb2_160k_ade20k_512x512.py @@ -1,22 +1,22 @@ # Copyright (c) OpenMMLab. All rights reserved. -from mmengine.config import read_base -from mmengine.model.weight_init import PretrainedInit -from mmengine.optim.scheduler.lr_scheduler import PolyLR -from torch.nn.modules.activation import ReLU -from torch.nn.modules.batchnorm import SyncBatchNorm as SyncBN -from torch.nn.modules.normalization import GroupNorm as GN -from torch.optim.adamw import AdamW - +from mmdet.models.layers import PixelDecoder from mmdet.models.losses import CrossEntropyLoss from mmdet.models.losses.dice_loss import DiceLoss from mmdet.models.losses.focal_loss import FocalLoss -from mmdet.models.layers import PixelDecoder from mmdet.models.task_modules.assigners import (ClassificationCost, HungarianAssigner) from mmdet.models.task_modules.assigners.match_cost import (DiceCost, FocalLossCost) from mmdet.models.task_modules.samplers.mask_pseudo_sampler import \ MaskPseudoSampler +from mmengine.config import read_base +from mmengine.model.weight_init import PretrainedInit +from mmengine.optim.scheduler.lr_scheduler import PolyLR +from torch.nn.modules.activation import ReLU +from torch.nn.modules.batchnorm import SyncBatchNorm as SyncBN +from torch.nn.modules.normalization import GroupNorm as GN +from torch.optim.adamw import AdamW + from mmseg.models.backbones import ResNet from mmseg.models.data_preprocessor import SegDataPreProcessor from mmseg.models.decode_heads import MaskFormerHead From 494cf9ba88044348e075d59ad9f0ad5380265dc9 Mon Sep 17 00:00:00 2001 From: tackhwa <55059307+tackhwa@users.noreply.github.com> Date: Sat, 29 Jun 2024 02:41:44 +0800 Subject: [PATCH 09/10] Update maskformer_r50_d32_8xb2_160k_ade20k_512x512.py --- .../maskformer/maskformer_r50_d32_8xb2_160k_ade20k_512x512.py | 1 + 1 file changed, 1 insertion(+) diff --git a/mmseg/configs/maskformer/maskformer_r50_d32_8xb2_160k_ade20k_512x512.py b/mmseg/configs/maskformer/maskformer_r50_d32_8xb2_160k_ade20k_512x512.py index 336a2adc87..2cc64f84cc 100644 --- a/mmseg/configs/maskformer/maskformer_r50_d32_8xb2_160k_ade20k_512x512.py +++ b/mmseg/configs/maskformer/maskformer_r50_d32_8xb2_160k_ade20k_512x512.py @@ -154,3 +154,4 @@ train_dataloader.update(dict(batch_size=2, num_workers=2)) val_dataloader.update(dict(batch_size=1, num_workers=4)) test_dataloader = val_dataloader + From aee3703c0967c0d402054286256ad14f0578018b Mon Sep 17 00:00:00 2001 From: tackhwa <55059307+tackhwa@users.noreply.github.com> Date: Sat, 29 Jun 2024 02:41:53 +0800 Subject: [PATCH 10/10] Update maskformer_r50_d32_8xb2_160k_ade20k_512x512.py --- .../maskformer/maskformer_r50_d32_8xb2_160k_ade20k_512x512.py | 1 - 1 file changed, 1 deletion(-) diff --git a/mmseg/configs/maskformer/maskformer_r50_d32_8xb2_160k_ade20k_512x512.py b/mmseg/configs/maskformer/maskformer_r50_d32_8xb2_160k_ade20k_512x512.py index 2cc64f84cc..336a2adc87 100644 --- a/mmseg/configs/maskformer/maskformer_r50_d32_8xb2_160k_ade20k_512x512.py +++ b/mmseg/configs/maskformer/maskformer_r50_d32_8xb2_160k_ade20k_512x512.py @@ -154,4 +154,3 @@ train_dataloader.update(dict(batch_size=2, num_workers=2)) val_dataloader.update(dict(batch_size=1, num_workers=4)) test_dataloader = val_dataloader -