diff --git a/configs/bisenetv1/README.md b/configs/bisenetv1/README.md index dd5bd503b2..e7a1c8dab1 100644 --- a/configs/bisenetv1/README.md +++ b/configs/bisenetv1/README.md @@ -35,8 +35,19 @@ | BiSeNetV1 (No Pretrain) | R-50-D32 | 1024x1024 | 160000 | 15.39 | 7.71 | 76.92 | 78.87 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/bisenetv1/bisenetv1_r50-d32_4x4_1024x1024_160k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/bisenetv1/bisenetv1_r50-d32_4x4_1024x1024_160k_cityscapes/bisenetv1_r50-d32_4x4_1024x1024_160k_cityscapes_20210923_222639-7b28a2a6.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/bisenetv1/bisenetv1_r50-d32_4x4_1024x1024_160k_cityscapes/bisenetv1_r50-d32_4x4_1024x1024_160k_cityscapes_20210923_222639.log.json) | | BiSeNetV1 | R-50-D32 | 1024x1024 | 160000 | 15.39 | 7.71 | 77.68 | 79.57 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/bisenetv1/bisenetv1_r50-d32_in1k-pre_4x4_1024x1024_160k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/bisenetv1/bisenetv1_r50-d32_in1k-pre_4x4_1024x1024_160k_cityscapes/bisenetv1_r50-d32_in1k-pre_4x4_1024x1024_160k_cityscapes_20210917_234628-8b304447.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/bisenetv1/bisenetv1_r50-d32_in1k-pre_4x4_1024x1024_160k_cityscapes/bisenetv1_r50-d32_in1k-pre_4x4_1024x1024_160k_cityscapes_20210917_234628.log.json) | +### COCO-Stuff 164k + +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config | download | +| --------- | --------- | --------- | ------: | -------- | -------------- | ----: | ------------- | --------------------------------------------------------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| BiSeNetV1 (No Pretrain) | R-18-D32 | 512x512 | 160000 | - | - | 25.45 | 26.15 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/bisenetv1/bisenetv1_r18-d32_lr5e-3_4x4_512x512_160k_coco-stuff164k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/bisenetv1/bisenetv1_r18-d32_lr5e-3_4x4_512x512_160k_coco-stuff164k/bisenetv1_r18-d32_lr5e-3_4x4_512x512_160k_coco-stuff164k_20211022_054328-046aa2f2.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/bisenetv1/bisenetv1_r18-d32_lr5e-3_4x4_512x512_160k_coco-stuff164k/bisenetv1_r18-d32_lr5e-3_4x4_512x512_160k_coco-stuff164k_20211022_054328.log.json) | +| BiSeNetV1| R-18-D32 | 512x512 | 160000 | 6.33 | 74.24 | 28.55 | 29.26 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/bisenetv1/bisenetv1_r18-d32_in1k-pre_lr5e-3_4x4_512x512_160k_coco-stuff164k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/bisenetv1/bisenetv1_r18-d32_in1k-pre_lr5e-3_4x4_512x512_160k_coco-stuff164k/bisenetv1_r18-d32_in1k-pre_lr5e-3_4x4_512x512_160k_coco-stuff164k_20211023_013100-f700dbf7.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/bisenetv1/bisenetv1_r18-d32_in1k-pre_lr5e-3_4x4_512x512_160k_coco-stuff164k/bisenetv1_r18-d32_in1k-pre_lr5e-3_4x4_512x512_160k_coco-stuff164k_20211023_013100.log.json) | +| BiSeNetV1 (No Pretrain) | R-50-D32 | 512x512 | 160000 | - | - | 29.82 | 30.33 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/bisenetv1/bisenetv1_r50-d32_lr5e-3_4x4_512x512_160k_coco-stuff164k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/bisenetv1/bisenetv1_r50-d32_lr5e-3_4x4_512x512_160k_coco-stuff164k/bisenetv1_r50-d32_lr5e-3_4x4_512x512_160k_coco-stuff164k_20211101_040616-d2bb0df4.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/bisenetv1/bisenetv1_r50-d32_lr5e-3_4x4_512x512_160k_coco-stuff164k/bisenetv1_r50-d32_lr5e-3_4x4_512x512_160k_coco-stuff164k_20211101_040616.log.json) | +| BiSeNetV1 | R-50-D32 | 512x512 | 160000 | 9.28 | 32.60 | 34.88 | 35.37 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/bisenetv1/bisenetv1_r50-d32_in1k-pre_lr5e-3_4x4_512x512_160k_coco-stuff164k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/bisenetv1/bisenetv1_r50-d32_in1k-pre_lr5e-3_4x4_512x512_160k_coco-stuff164k/bisenetv1_r50-d32_in1k-pre_lr5e-3_4x4_512x512_160k_coco-stuff164k_20211101_181932-66747911.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/bisenetv1/bisenetv1_r50-d32_in1k-pre_lr5e-3_4x4_512x512_160k_coco-stuff164k/bisenetv1_r50-d32_in1k-pre_lr5e-3_4x4_512x512_160k_coco-stuff164k_20211101_181932.log.json) | +| BiSeNetV1(No Pretrain) | R-101-D32 | 512x512 | 160000 | - | - | 31.14 | 31.76 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/bisenetv1/bisenetv1_r101-d32_lr5e-3_4x4_512x512_160k_coco-stuff164k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/bisenetv1/bisenetv1_r101-d32_lr5e-3_4x4_512x512_160k_coco-stuff164k/bisenetv1_r101-d32_lr5e-3_4x4_512x512_160k_coco-stuff164k_20211102_164147-c6b32c3b.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/bisenetv1/bisenetv1_r101-d32_lr5e-3_4x4_512x512_160k_coco-stuff164k/bisenetv1_r101-d32_lr5e-3_4x4_512x512_160k_coco-stuff164k_20211102_164147.log.json) | +| BiSeNetV1 | R-101-D32 | 512x512 | 160000 | 10.36 | 25.25 | 37.38 | 37.99 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/bisenetv1/bisenetv1_r101-d32_in1k-pre_lr5e-3_4x4_512x512_160k_coco-stuff164k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/bisenetv1/bisenetv1_r101-d32_in1k-pre_lr5e-3_4x4_512x512_160k_coco-stuff164k/bisenetv1_r101-d32_in1k-pre_lr5e-3_4x4_512x512_160k_coco-stuff164k_20211101_225220-28c8f092.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/bisenetv1/bisenetv1_r101-d32_in1k-pre_lr5e-3_4x4_512x512_160k_coco-stuff164k/bisenetv1_r101-d32_in1k-pre_lr5e-3_4x4_512x512_160k_coco-stuff164k_20211101_225220.log.json) | + Note: - `4x8`: Using 4 GPUs with 8 samples per GPU in training. -- Default setting is 4 GPUs with 4 samples per GPU in training. +- For BiSeNetV1 on Cityscapes dataset, default setting is 4 GPUs with 4 samples per GPU in training. - `No Pretrain` means the model is trained from scratch. diff --git a/configs/bisenetv1/bisenetv1.yml b/configs/bisenetv1/bisenetv1.yml index 8ea94df4bd..26a7c60044 100644 --- a/configs/bisenetv1/bisenetv1.yml +++ b/configs/bisenetv1/bisenetv1.yml @@ -3,6 +3,7 @@ Collections: Metadata: Training Data: - Cityscapes + - COCO-Stuff 164k Paper: URL: https://arxiv.org/abs/1808.00897 Title: 'BiSeNet: Bilateral Segmentation Network for Real-time Semantic Segmentation' @@ -123,3 +124,111 @@ Models: mIoU(ms+flip): 79.57 Config: configs/bisenetv1/bisenetv1_r50-d32_in1k-pre_4x4_1024x1024_160k_cityscapes.py Weights: https://download.openmmlab.com/mmsegmentation/v0.5/bisenetv1/bisenetv1_r50-d32_in1k-pre_4x4_1024x1024_160k_cityscapes/bisenetv1_r50-d32_in1k-pre_4x4_1024x1024_160k_cityscapes_20210917_234628-8b304447.pth +- Name: bisenetv1_r18-d32_lr5e-3_4x4_512x512_160k_coco-stuff164k + In Collection: bisenetv1 + Metadata: + backbone: R-18-D32 + crop size: (512,512) + lr schd: 160000 + Results: + - Task: Semantic Segmentation + Dataset: COCO-Stuff 164k + Metrics: + mIoU: 25.45 + mIoU(ms+flip): 26.15 + Config: configs/bisenetv1/bisenetv1_r18-d32_lr5e-3_4x4_512x512_160k_coco-stuff164k.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/bisenetv1/bisenetv1_r18-d32_lr5e-3_4x4_512x512_160k_coco-stuff164k/bisenetv1_r18-d32_lr5e-3_4x4_512x512_160k_coco-stuff164k_20211022_054328-046aa2f2.pth +- Name: bisenetv1_r18-d32_in1k-pre_lr5e-3_4x4_512x512_160k_coco-stuff164k + In Collection: bisenetv1 + Metadata: + backbone: R-18-D32 + crop size: (512,512) + lr schd: 160000 + inference time (ms/im): + - value: 13.47 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (512,512) + memory (GB): 6.33 + Results: + - Task: Semantic Segmentation + Dataset: COCO-Stuff 164k + Metrics: + mIoU: 28.55 + mIoU(ms+flip): 29.26 + Config: configs/bisenetv1/bisenetv1_r18-d32_in1k-pre_lr5e-3_4x4_512x512_160k_coco-stuff164k.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/bisenetv1/bisenetv1_r18-d32_in1k-pre_lr5e-3_4x4_512x512_160k_coco-stuff164k/bisenetv1_r18-d32_in1k-pre_lr5e-3_4x4_512x512_160k_coco-stuff164k_20211023_013100-f700dbf7.pth +- Name: bisenetv1_r50-d32_lr5e-3_4x4_512x512_160k_coco-stuff164k + In Collection: bisenetv1 + Metadata: + backbone: R-50-D32 + crop size: (512,512) + lr schd: 160000 + Results: + - Task: Semantic Segmentation + Dataset: COCO-Stuff 164k + Metrics: + mIoU: 29.82 + mIoU(ms+flip): 30.33 + Config: configs/bisenetv1/bisenetv1_r50-d32_lr5e-3_4x4_512x512_160k_coco-stuff164k.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/bisenetv1/bisenetv1_r50-d32_lr5e-3_4x4_512x512_160k_coco-stuff164k/bisenetv1_r50-d32_lr5e-3_4x4_512x512_160k_coco-stuff164k_20211101_040616-d2bb0df4.pth +- Name: bisenetv1_r50-d32_in1k-pre_lr5e-3_4x4_512x512_160k_coco-stuff164k + In Collection: bisenetv1 + Metadata: + backbone: R-50-D32 + crop size: (512,512) + lr schd: 160000 + inference time (ms/im): + - value: 30.67 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (512,512) + memory (GB): 9.28 + Results: + - Task: Semantic Segmentation + Dataset: COCO-Stuff 164k + Metrics: + mIoU: 34.88 + mIoU(ms+flip): 35.37 + Config: configs/bisenetv1/bisenetv1_r50-d32_in1k-pre_lr5e-3_4x4_512x512_160k_coco-stuff164k.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/bisenetv1/bisenetv1_r50-d32_in1k-pre_lr5e-3_4x4_512x512_160k_coco-stuff164k/bisenetv1_r50-d32_in1k-pre_lr5e-3_4x4_512x512_160k_coco-stuff164k_20211101_181932-66747911.pth +- Name: bisenetv1_r101-d32_lr5e-3_4x4_512x512_160k_coco-stuff164k + In Collection: bisenetv1 + Metadata: + backbone: R-101-D32 + crop size: (512,512) + lr schd: 160000 + Results: + - Task: Semantic Segmentation + Dataset: COCO-Stuff 164k + Metrics: + mIoU: 31.14 + mIoU(ms+flip): 31.76 + Config: configs/bisenetv1/bisenetv1_r101-d32_lr5e-3_4x4_512x512_160k_coco-stuff164k.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/bisenetv1/bisenetv1_r101-d32_lr5e-3_4x4_512x512_160k_coco-stuff164k/bisenetv1_r101-d32_lr5e-3_4x4_512x512_160k_coco-stuff164k_20211102_164147-c6b32c3b.pth +- Name: bisenetv1_r101-d32_in1k-pre_lr5e-3_4x4_512x512_160k_coco-stuff164k + In Collection: bisenetv1 + Metadata: + backbone: R-101-D32 + crop size: (512,512) + lr schd: 160000 + inference time (ms/im): + - value: 39.6 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (512,512) + memory (GB): 10.36 + Results: + - Task: Semantic Segmentation + Dataset: COCO-Stuff 164k + Metrics: + mIoU: 37.38 + mIoU(ms+flip): 37.99 + Config: configs/bisenetv1/bisenetv1_r101-d32_in1k-pre_lr5e-3_4x4_512x512_160k_coco-stuff164k.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/bisenetv1/bisenetv1_r101-d32_in1k-pre_lr5e-3_4x4_512x512_160k_coco-stuff164k/bisenetv1_r101-d32_in1k-pre_lr5e-3_4x4_512x512_160k_coco-stuff164k_20211101_225220-28c8f092.pth diff --git a/configs/bisenetv1/bisenetv1_r101-d32_in1k-pre_lr5e-3_4x4_512x512_160k_coco-stuff164k.py b/configs/bisenetv1/bisenetv1_r101-d32_in1k-pre_lr5e-3_4x4_512x512_160k_coco-stuff164k.py new file mode 100644 index 0000000000..c3fe21597d --- /dev/null +++ b/configs/bisenetv1/bisenetv1_r101-d32_in1k-pre_lr5e-3_4x4_512x512_160k_coco-stuff164k.py @@ -0,0 +1,6 @@ +_base_ = './bisenetv1_r101-d32_lr5e-3_4x4_512x512_160k_coco-stuff164k.py' +model = dict( + backbone=dict( + backbone_cfg=dict( + init_cfg=dict( + type='Pretrained', checkpoint='open-mmlab://resnet101_v1c')))) diff --git a/configs/bisenetv1/bisenetv1_r101-d32_lr5e-3_4x4_512x512_160k_coco-stuff164k.py b/configs/bisenetv1/bisenetv1_r101-d32_lr5e-3_4x4_512x512_160k_coco-stuff164k.py new file mode 100644 index 0000000000..b1e1c3e863 --- /dev/null +++ b/configs/bisenetv1/bisenetv1_r101-d32_lr5e-3_4x4_512x512_160k_coco-stuff164k.py @@ -0,0 +1,18 @@ +_base_ = [ + '../_base_/models/bisenetv1_r18-d32.py', + '../_base_/datasets/coco-stuff164k.py', '../_base_/default_runtime.py', + '../_base_/schedules/schedule_160k.py' +] +model = dict( + backbone=dict( + context_channels=(512, 1024, 2048), + spatial_channels=(256, 256, 256, 512), + out_channels=1024, + backbone_cfg=dict(type='ResNet', depth=101)), + decode_head=dict(in_channels=1024, channels=1024, num_classes=171), + auxiliary_head=[ + dict(in_channels=512, channels=256, num_classes=171), + dict(in_channels=512, channels=256, num_classes=171), + ]) +lr_config = dict(warmup='linear', warmup_iters=1000) +optimizer = dict(lr=0.005) diff --git a/configs/bisenetv1/bisenetv1_r18-d32_in1k-pre_lr5e-3_4x4_512x512_160k_coco-stuff164k.py b/configs/bisenetv1/bisenetv1_r18-d32_in1k-pre_lr5e-3_4x4_512x512_160k_coco-stuff164k.py new file mode 100644 index 0000000000..c6d93049e2 --- /dev/null +++ b/configs/bisenetv1/bisenetv1_r18-d32_in1k-pre_lr5e-3_4x4_512x512_160k_coco-stuff164k.py @@ -0,0 +1,6 @@ +_base_ = './bisenetv1_r18-d32_lr5e-3_4x4_512x512_160k_coco-stuff164k.py' +model = dict( + backbone=dict( + backbone_cfg=dict( + init_cfg=dict( + type='Pretrained', checkpoint='open-mmlab://resnet18_v1c'))), ) diff --git a/configs/bisenetv1/bisenetv1_r18-d32_lr5e-3_4x4_512x512_160k_coco-stuff164k.py b/configs/bisenetv1/bisenetv1_r18-d32_lr5e-3_4x4_512x512_160k_coco-stuff164k.py new file mode 100644 index 0000000000..78d7fea629 --- /dev/null +++ b/configs/bisenetv1/bisenetv1_r18-d32_lr5e-3_4x4_512x512_160k_coco-stuff164k.py @@ -0,0 +1,13 @@ +_base_ = [ + '../_base_/models/bisenetv1_r18-d32.py', + '../_base_/datasets/coco-stuff164k.py', '../_base_/default_runtime.py', + '../_base_/schedules/schedule_160k.py' +] +model = dict( + decode_head=dict(num_classes=171), + auxiliary_head=[ + dict(num_classes=171), + dict(num_classes=171), + ]) +lr_config = dict(warmup='linear', warmup_iters=1000) +optimizer = dict(lr=0.005) diff --git a/configs/bisenetv1/bisenetv1_r50-d32_in1k-pre_lr5e-3_4x4_512x512_160k_coco-stuff164k.py b/configs/bisenetv1/bisenetv1_r50-d32_in1k-pre_lr5e-3_4x4_512x512_160k_coco-stuff164k.py new file mode 100644 index 0000000000..f0fea69f2f --- /dev/null +++ b/configs/bisenetv1/bisenetv1_r50-d32_in1k-pre_lr5e-3_4x4_512x512_160k_coco-stuff164k.py @@ -0,0 +1,7 @@ +_base_ = './bisenetv1_r50-d32_lr5e-3_4x4_512x512_160k_coco-stuff164k.py' + +model = dict( + backbone=dict( + backbone_cfg=dict( + init_cfg=dict( + type='Pretrained', checkpoint='open-mmlab://resnet50_v1c')))) diff --git a/configs/bisenetv1/bisenetv1_r50-d32_lr5e-3_4x4_512x512_160k_coco-stuff164k.py b/configs/bisenetv1/bisenetv1_r50-d32_lr5e-3_4x4_512x512_160k_coco-stuff164k.py new file mode 100644 index 0000000000..dbbccc69d8 --- /dev/null +++ b/configs/bisenetv1/bisenetv1_r50-d32_lr5e-3_4x4_512x512_160k_coco-stuff164k.py @@ -0,0 +1,18 @@ +_base_ = [ + '../_base_/models/bisenetv1_r18-d32.py', + '../_base_/datasets/coco-stuff164k.py', '../_base_/default_runtime.py', + '../_base_/schedules/schedule_160k.py' +] +model = dict( + backbone=dict( + context_channels=(512, 1024, 2048), + spatial_channels=(256, 256, 256, 512), + out_channels=1024, + backbone_cfg=dict(type='ResNet', depth=50)), + decode_head=dict(in_channels=1024, channels=1024, num_classes=171), + auxiliary_head=[ + dict(in_channels=512, channels=256, num_classes=171), + dict(in_channels=512, channels=256, num_classes=171), + ]) +lr_config = dict(warmup='linear', warmup_iters=1000) +optimizer = dict(lr=0.005)