Fix #282: Support init_cfg & update depreciated configs (#365)

* update coco ref * init_cfg for dbnet * initcfg for mask_rcnn * textsnake init_cfg * fix dbnet * panet initcfg * psenet initcfg * fcenet initcfg * drrg initcfg * add init_cfg to detectors * update maskrcnn config file to support mmdet * fix init_cfg of fce_head * crnn initcfg * init_weights in training * nrtr initcfg * robust_scanner initcfg * sar init_cfg * seg init_cfg * tps_crnn init_cfg * sdmgr initcfg * ner init_cfg * fix textsnake * sdmgr initcfg * move "pretrained" to "init_cfg" for config files * Moduleslist update * fix seg * ner init_cfg * fix base * fix encode decode recognizer * revert dbnet config * fix crnn * fix base.py * fix robust_scanner * fix panet * fix test * remove redundant init_weights() in fcehead * clean up * relex mmdet version in workflow * Add dependency version check * Update mmocr/models/textdet/dense_heads/pse_head.py Co-authored-by: Hongbin Sun <[email protected]> Co-authored-by: Hongbin Sun <[email protected]>
open-mmlab · Jul 20, 2021 · 4f7270e · 4f7270e
1 parent 884755d
commit 4f7270e
Show file tree

Hide file tree

Showing 69 changed files with 408 additions and 444 deletions.
diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
@@ -58,7 +58,7 @@ jobs:
       - name: Install MMCV
         run: pip install mmcv-full -f https://download.openmmlab.com/mmcv/dist/cpu/torch${{matrix.torch}}/index.html
       - name: Install MMDet
-        run: pip install mmdet==2.11.0
+        run: pip install mmdet
       - name: Install other dependencies
         run: pip install -r requirements.txt
       - name: Build and install
@@ -139,7 +139,7 @@ jobs:
       - name: Install mmocr dependencies
         run: |
           pip install mmcv-full -f https://download.openmmlab.com/mmcv/dist/cu101/torch${{matrix.torch_version}}/index.html
-          pip install mmdet==2.11.0
+          pip install mmdet
           pip install -r requirements.txt
       - name: Build and install
         run: |

diff --git a/configs/_base_/models/ocr_mask_rcnn_r50_fpn_ohem.py b/configs/_base_/models/ocr_mask_rcnn_r50_fpn_ohem.py
@@ -1,14 +1,14 @@
 # model settings
 model = dict(
     type='OCRMaskRCNN',
-    pretrained='torchvision://resnet50',
     backbone=dict(
         type='ResNet',
         depth=50,
         num_stages=4,
         out_indices=(0, 1, 2, 3),
         frozen_stages=1,
         norm_cfg=dict(type='BN', requires_grad=True),
+        init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50'),
         norm_eval=True,
         style='pytorch'),
     neck=dict(
@@ -91,8 +91,8 @@
             nms_across_levels=False,
             nms_pre=2000,
             nms_post=1000,
-            max_num=1000,
-            nms_thr=0.7,
+            max_per_img=1000,
+            nms=dict(type='nms', iou_threshold=0.7),
             min_bbox_size=0),
         rcnn=dict(
             assigner=dict(
@@ -116,8 +116,8 @@
             nms_across_levels=False,
             nms_pre=1000,
             nms_post=1000,
-            max_num=1000,
-            nms_thr=0.7,
+            max_per_img=1000,
+            nms=dict(type='nms', iou_threshold=0.7),
             min_bbox_size=0),
         rcnn=dict(
             score_thr=0.05,

diff --git a/configs/_base_/models/ocr_mask_rcnn_r50_fpn_ohem_poly.py b/configs/_base_/models/ocr_mask_rcnn_r50_fpn_ohem_poly.py
@@ -2,7 +2,6 @@
 model = dict(
     type='OCRMaskRCNN',
     text_repr_type='poly',
-    pretrained='torchvision://resnet50',
     backbone=dict(
         type='ResNet',
         depth=50,
@@ -11,6 +10,7 @@
         frozen_stages=1,
         norm_cfg=dict(type='BN', requires_grad=True),
         norm_eval=True,
+        init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50'),
         style='pytorch'),
     neck=dict(
         type='FPN',
@@ -90,8 +90,8 @@
             nms_across_levels=False,
             nms_pre=2000,
             nms_post=1000,
-            max_num=1000,
-            nms_thr=0.7,
+            max_per_img=1000,
+            nms=dict(type='nms', iou_threshold=0.7),
             min_bbox_size=0),
         rcnn=dict(
             assigner=dict(
@@ -116,8 +116,8 @@
             nms_across_levels=False,
             nms_pre=1000,
             nms_post=1000,
-            max_num=1000,
-            nms_thr=0.7,
+            max_per_img=1000,
+            nms=dict(type='nms', iou_threshold=0.7),
             min_bbox_size=0),
         rcnn=dict(
             score_thr=0.05,

diff --git a/configs/ner/bert_softmax/bert_softmax_cluener_18e.py b/configs/ner/bert_softmax/bert_softmax_cluener_18e.py
@@ -56,9 +56,13 @@
 
 model = dict(
     type='NerClassifier',
-    pretrained='https://download.openmmlab.com/mmocr/ner/'
-    'bert_softmax/bert_pretrain.pth',
-    encoder=dict(type='BertEncoder', max_position_embeddings=512),
+    encoder=dict(
+        type='BertEncoder',
+        max_position_embeddings=512,
+        init_cfg=dict(
+            type='Pretrained',
+            checkpoint='https://download.openmmlab.com/mmocr/ner/'
+            'bert_softmax/bert_pretrain.pth')),
     decoder=dict(type='FCDecoder'),
     loss=dict(type='MaskedCrossEntropyLoss'),
     label_convertor=ner_convertor)

diff --git a/configs/textdet/dbnet/dbnet_r18_fpnc_1200e_icdar2015.py b/configs/textdet/dbnet/dbnet_r18_fpnc_1200e_icdar2015.py
@@ -3,14 +3,14 @@
 ]
 model = dict(
     type='DBNet',
-    pretrained='torchvision://resnet18',
     backbone=dict(
         type='ResNet',
         depth=18,
         num_stages=4,
         out_indices=(0, 1, 2, 3),
         frozen_stages=-1,
         norm_cfg=dict(type='BN', requires_grad=True),
+        init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet18'),
         norm_eval=False,
         style='caffe'),
     neck=dict(
@@ -24,7 +24,7 @@
     test_cfg=None)
 
 dataset_type = 'IcdarDataset'
-data_root = 'data/icdar2015/'
+data_root = 'data/icdar2015'
 img_norm_cfg = dict(
     mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
 # for visualizing img, pls uncomment it.

diff --git a/configs/textdet/dbnet/dbnet_r50dcnv2_fpnc_1200e_icdar2015.py b/configs/textdet/dbnet/dbnet_r50dcnv2_fpnc_1200e_icdar2015.py
@@ -5,7 +5,6 @@
 
 model = dict(
     type='DBNet',
-    pretrained='torchvision://resnet50',
     backbone=dict(
         type='ResNet',
         depth=50,
@@ -16,6 +15,7 @@
         norm_eval=False,
         style='caffe',
         dcn=dict(type='DCNv2', deform_groups=1, fallback_on_stride=False),
+        init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50'),
         stage_with_dcn=(False, True, True, True)),
     neck=dict(
         type='FPNC', in_channels=[256, 512, 1024, 2048], lateral_channels=256),

diff --git a/configs/textdet/drrg/drrg_r50_fpn_unet_1200e_ctw1500.py b/configs/textdet/drrg/drrg_r50_fpn_unet_1200e_ctw1500.py
@@ -4,14 +4,14 @@
 ]
 model = dict(
     type='DRRG',
-    pretrained='torchvision://resnet50',
     backbone=dict(
         type='ResNet',
         depth=50,
         num_stages=4,
         out_indices=(0, 1, 2, 3),
         frozen_stages=-1,
         norm_cfg=dict(type='BN', requires_grad=True),
+        init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50'),
         norm_eval=True,
         style='caffe'),
     neck=dict(

diff --git a/configs/textdet/fcenet/fcenet_r50_fpn_1500e_icdar2015.py b/configs/textdet/fcenet/fcenet_r50_fpn_1500e_icdar2015.py
@@ -1,22 +1,21 @@
 fourier_degree = 5
 model = dict(
     type='FCENet',
-    pretrained='torchvision://resnet50',
     backbone=dict(
         type='ResNet',
         depth=50,
         num_stages=4,
         out_indices=(1, 2, 3),
         frozen_stages=-1,
         norm_cfg=dict(type='BN', requires_grad=True),
+        init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50'),
         norm_eval=False,
         style='pytorch'),
     neck=dict(
         type='FPN',
         in_channels=[512, 1024, 2048],
         out_channels=256,
-        add_extra_convs=True,
-        extra_convs_on_inputs=False,  # use P5
+        add_extra_convs='on_output',
         num_outs=3,
         relu_before_extra_convs=True,
         act_cfg=None),

diff --git a/configs/textdet/fcenet/fcenet_r50dcnv2_fpn_1500e_ctw1500.py b/configs/textdet/fcenet/fcenet_r50dcnv2_fpn_1500e_ctw1500.py
@@ -1,7 +1,6 @@
 fourier_degree = 5
 model = dict(
     type='FCENet',
-    pretrained='torchvision://resnet50',
     backbone=dict(
         type='ResNet',
         depth=50,
@@ -12,13 +11,13 @@
         norm_eval=True,
         style='pytorch',
         dcn=dict(type='DCNv2', deform_groups=2, fallback_on_stride=False),
+        init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50'),
         stage_with_dcn=(False, True, True, True)),
     neck=dict(
         type='FPN',
         in_channels=[512, 1024, 2048],
         out_channels=256,
-        add_extra_convs=True,
-        extra_convs_on_inputs=False,  # use P5
+        add_extra_convs='on_output',
         num_outs=3,
         relu_before_extra_convs=True,
         act_cfg=None),

diff --git a/configs/textdet/panet/panet_r18_fpem_ffm_600e_ctw1500.py b/configs/textdet/panet/panet_r18_fpem_ffm_600e_ctw1500.py
@@ -4,14 +4,14 @@
 ]
 model = dict(
     type='PANet',
-    pretrained='torchvision://resnet18',
     backbone=dict(
         type='ResNet',
         depth=18,
         num_stages=4,
         out_indices=(0, 1, 2, 3),
         frozen_stages=-1,
         norm_cfg=dict(type='SyncBN', requires_grad=True),
+        init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet18'),
         norm_eval=True,
         style='caffe'),
     neck=dict(type='FPEM_FFM', in_channels=[64, 128, 256, 512]),

diff --git a/configs/textdet/panet/panet_r18_fpem_ffm_600e_icdar2015.py b/configs/textdet/panet/panet_r18_fpem_ffm_600e_icdar2015.py
@@ -4,14 +4,14 @@
 ]
 model = dict(
     type='PANet',
-    pretrained='torchvision://resnet18',
     backbone=dict(
         type='ResNet',
         depth=18,
         num_stages=4,
         out_indices=(0, 1, 2, 3),
         frozen_stages=-1,
         norm_cfg=dict(type='SyncBN', requires_grad=True),
+        init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet18'),
         norm_eval=True,
         style='caffe'),
     neck=dict(type='FPEM_FFM', in_channels=[64, 128, 256, 512]),

diff --git a/configs/textdet/psenet/psenet_r50_fpnf_600e_ctw1500.py b/configs/textdet/psenet/psenet_r50_fpnf_600e_ctw1500.py
@@ -9,14 +9,14 @@
 
 model = dict(
     type='PSENet',
-    pretrained='torchvision://resnet50',
     backbone=dict(
         type='ResNet',
         depth=50,
         num_stages=4,
         out_indices=(0, 1, 2, 3),
         frozen_stages=-1,
         norm_cfg=dict(type='SyncBN', requires_grad=True),
+        init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50'),
         norm_eval=True,
         style='caffe'),
     neck=dict(

diff --git a/configs/textdet/psenet/psenet_r50_fpnf_600e_icdar2015.py b/configs/textdet/psenet/psenet_r50_fpnf_600e_icdar2015.py
@@ -9,14 +9,14 @@
 
 model = dict(
     type='PSENet',
-    pretrained='torchvision://resnet50',
     backbone=dict(
         type='ResNet',
         depth=50,
         num_stages=4,
         out_indices=(0, 1, 2, 3),
         frozen_stages=-1,
         norm_cfg=dict(type='SyncBN', requires_grad=True),
+        init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50'),
         norm_eval=True,
         style='caffe'),
     neck=dict(

diff --git a/configs/textdet/psenet/psenet_r50_fpnf_600e_icdar2017.py b/configs/textdet/psenet/psenet_r50_fpnf_600e_icdar2017.py
@@ -4,14 +4,14 @@
 ]
 model = dict(
     type='PSENet',
-    pretrained='torchvision://resnet50',
     backbone=dict(
         type='ResNet',
         depth=50,
         num_stages=4,
         out_indices=(0, 1, 2, 3),
         frozen_stages=-1,
         norm_cfg=dict(type='BN', requires_grad=True),
+        init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50'),
         norm_eval=True,
         style='caffe'),
     neck=dict(

diff --git a/configs/textdet/textsnake/textsnake_r50_fpn_unet_1200e_ctw1500.py b/configs/textdet/textsnake/textsnake_r50_fpn_unet_1200e_ctw1500.py
@@ -4,14 +4,14 @@
 ]
 model = dict(
     type='TextSnake',
-    pretrained='torchvision://resnet50',
     backbone=dict(
         type='ResNet',
         depth=50,
         num_stages=4,
         out_indices=(0, 1, 2, 3),
         frozen_stages=-1,
         norm_cfg=dict(type='BN', requires_grad=True),
+        init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50'),
         norm_eval=True,
         style='caffe'),
     neck=dict(

diff --git a/mmocr/__init__.py b/mmocr/__init__.py
@@ -1,3 +1,40 @@
+import mmcv
+import mmdet
+
 from .version import __version__, short_version
 
+
+def digit_version(version_str):
+    digit_version = []
+    for x in version_str.split('.'):
+        if x.isdigit():
+            digit_version.append(int(x))
+        elif x.find('rc') != -1:
+            patch_version = x.split('rc')
+            digit_version.append(int(patch_version[0]) - 1)
+            digit_version.append(int(patch_version[1]))
+    return digit_version
+
+
+mmcv_minimum_version = '1.3.8'
+mmcv_maximum_version = '1.4.0'
+mmcv_version = digit_version(mmcv.__version__)
+
+assert (mmcv_version >= digit_version(mmcv_minimum_version)
+        and mmcv_version <= digit_version(mmcv_maximum_version)), \
+    f'MMCV {mmcv.__version__} is incompatible with MMOCR {__version__}. ' \
+    f'Please use MMCV >= {mmcv_minimum_version}, ' \
+    f'<= {mmcv_maximum_version} instead.'
+
+mmdet_minimum_version = '2.13.0'
+mmdet_maximum_version = '2.20.0'
+mmdet_version = digit_version(mmdet.__version__)
+
+assert (mmdet_version >= digit_version(mmdet_minimum_version)
+        and mmdet_version <= digit_version(mmdet_maximum_version)), \
+    f'MMDetection {mmdet.__version__} is incompatible ' \
+    f'with MMOCR {__version__}. ' \
+    f'Please use MMDetection >= {mmdet_minimum_version}, ' \
+    f'<= {mmdet_maximum_version} instead.'
+
 __all__ = ['__version__', 'short_version']
diff --git a/mmocr/datasets/icdar_dataset.py b/mmocr/datasets/icdar_dataset.py
@@ -1,7 +1,7 @@
 import numpy as np
+from mmdet.datasets.api_wrappers import COCO
 from mmdet.datasets.builder import DATASETS
 from mmdet.datasets.coco import CocoDataset
-from pycocotools.coco import COCO
 
 import mmocr.utils as utils
 from mmocr.core.evaluation.hmean import eval_hmean