first commit

DerrickWang005 · Oct 14, 2023 · 428286c · 428286c
commit 428286c
Show file tree

Hide file tree

Showing 48 changed files with 3,844 additions and 0 deletions.
diff --git a/.DS_Store b/.DS_Store
diff --git a/.gitignore b/.gitignore
@@ -0,0 +1,5 @@
+exps/
+datasets/
+.vscode/
+*.png
+*.pth.tar
diff --git a/README.md b/README.md
@@ -0,0 +1,63 @@
+## [CVPR 2022] Exploring Set Similarity for Dense Self-supervised Representation Learning
+
+
+
+<p align="center">
+  <img src="fig/framework.png" width="600">
+</p>
+
+This is a PyTorch implementation of [our paper.](https://openreview.net/pdf?id=JAezPMehaUu)
+```
+@inproceedings{wang2022exploring,
+  title={Exploring set similarity for dense self-supervised representation learning},
+  author={Wang, Zhaoqing and Li, Qiang and Zhang, Guoxin and Wan, Pengfei and Zheng, Wen and Wang, Nannan and Gong, Mingming and Liu, Tongliang},
+  booktitle={Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition},
+  pages={16590--16599},
+  year={2022}
+}
+```
+
+
+### Requirements
+
+- Python >= 3.7.12
+- PyTorch >= 1.10.2
+- torchvision >= 0.11.3
+
+Install PyTorch and ImageNet dataset following the [official PyTorch ImageNet training code](https://github.com/pytorch/examples/tree/master/imagenet).
+
+For other dependencies, please run:
+```
+pip install -r requirements.txt
+```
+
+### Unsupervised Pre-training
+
+This implementation only supports **multi-gpu**, **DistributedDataParallel** training, which is faster and simpler; single-gpu or DataParallel training is not supported.
+
+To do unsupervised pre-training of a ResNet-50 model on ImageNet in an 8-gpu machine, please run:
+```
+bash train_pretrain.sh
+```
+
+### Linear Classification
+
+With a pre-trained model, to train a supervised linear classifier on frozen features/weights in an 8-gpu machine, please run:
+```
+bash train_lincls.sh
+```
+
+### Fine-tuning on Pascal VOC object detection
+
+With a pre-trained model, to fine-tune a Faster R-CNN on unfrozen features/weights in an 8-gpu machine, please run:
+```
+bash train_det.sh
+```
+<p align="center">
+  <img src="fig/pascal.png" width="600">
+</p>
+
+
+
+### License
+This project is under the MIT license. See the [LICENSE](LICENSE) file for more details.
diff --git a/detection/.DS_Store b/detection/.DS_Store
diff --git a/detection/.ipynb_checkpoints/convert-pretrain-to-detectron2-checkpoint.py b/detection/.ipynb_checkpoints/convert-pretrain-to-detectron2-checkpoint.py
@@ -0,0 +1,37 @@
+#!/usr/bin/env python
+# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
+import pdb
+import pickle as pkl
+import sys
+import torch
+import numpy as np
+
+if __name__ == "__main__":
+    input = sys.argv[1]
+
+    obj = torch.load(input, map_location="cpu")
+    obj = obj["state_dict"]
+
+    newmodel = {}
+    for k, v in obj.items():
+        if not k.startswith("module.encoder_q.0."):
+            continue
+        old_k = k
+        k = k.replace("module.encoder_q.0.", "")
+        if "layer" not in k:
+            k = "stem." + k
+        for t in [1, 2, 3, 4]:
+            k = k.replace("layer{}".format(t), "res{}".format(t + 1))
+        for t in [1, 2, 3]:
+            k = k.replace("bn{}".format(t), "conv{}.norm".format(t))
+        k = k.replace("downsample.0", "shortcut")
+        k = k.replace("downsample.1", "shortcut.norm")
+        print(old_k, "->", k)
+        newmodel[k] = v.numpy()
+
+    res = {"model": newmodel, "__author__": "MOCO", "matching_heuristics": True}
+
+    with open(sys.argv[2], "wb") as f:
+        pkl.dump(res, f)
+
+
diff --git a/detection/.ipynb_checkpoints/train_det-checkpoint.sh b/detection/.ipynb_checkpoints/train_det-checkpoint.sh
@@ -0,0 +1,52 @@
+# Object Detection on VOC 2007+2012
+# 24epoch Faster RCNN (C4-backbone)
+# for num in {1..5}
+# do
+# python train_net.py --config-file ./configs/pascal_voc_R_50_C4_24k_base.yaml \
+#                     --num-gpus 8 --dist-url tcp://127.0.0.1:1681 \
+#                     MODEL.WEIGHTS ./r50_SeF_Sym2x_200ep.pkl \
+#                     OUTPUT_DIR ./exp_voc/SemanticFocus_attention0p7_negtive0p2_nearest1_geometry0p5_Sym2x_ep200_trial_$num
+# done
+
+# Object Detection & Instance Segmentation on COCO 2017
+# 1x schedule Mask RCNN (FPN)
+# python train_net.py --config-file configs/coco_R_50_FPN_1x_base.yaml \
+#                               --num-gpus 8 --dist-url tcp://127.0.0.1:2681 \
+#                               MODEL.WEIGHTS ./detection/output_densecl_200e.pkl \
+#                               OUTPUT_DIR ./exp_coco1x/SemanticFocus_attention0p7_negtive0p2_nearest1_geometry0p5_Sym2x_ep200
+#                             #   --eval-only --resume \
+# 2x schedule Mask RCNN (FPN)
+python train_net.py --config-file ./configs/coco_R_50_FPN_2x_base.yaml \
+                              --num-gpus 8 --dist-url tcp://127.0.0.1:3681 \
+                              MODEL.WEIGHTS ./r50_SeF_Sym2x_200ep.pkl \
+                              OUTPUT_DIR ./exp_coco2x/SemanticFocus_attention0p7_negtive0p2_nearest1_geometry0p5_Sym2x_ep200
+
+python train_net.py --config-file ./configs/coco_R_50_FPN_2x_base.yaml \
+                                --num-gpus 8 --dist-url tcp://127.0.0.1:1681 \
+                                MODEL.WEIGHTS ./pretrain/r50_moco_v1_200ep.pkl \
+                                OUTPUT_DIR ./exp_coco2x/moco_v1_200ep
+
+python train_net.py --config-file ./configs/coco_R_50_FPN_2x_base.yaml \
+                                --num-gpus 8 --dist-url tcp://127.0.0.1:1681 \
+                                MODEL.WEIGHTS ./pretrain/r50_moco_v2_200ep.pkl \
+                                OUTPUT_DIR ./exp_coco2x/moco_v2_200ep
+
+python train_net.py --config-file ./configs/coco_R_50_FPN_2x_base.yaml \
+                                --num-gpus 8 --dist-url tcp://127.0.0.1:1681 \
+                                MODEL.WEIGHTS ./pretrain/r50_densecl_200ep.pkl \
+                                OUTPUT_DIR ./exp_coco2x/densecl_200ep
+
+python train_net.py --config-file ./configs/coco_R_50_FPN_2x_base.yaml \
+                                --num-gpus 8 --dist-url tcp://127.0.0.1:1681 \
+                                MODEL.WEIGHTS ./pretrain/r50_pixpro_100ep.pkl \
+                                OUTPUT_DIR ./exp_coco2x/pixpro_100ep
+
+python train_net.py --config-file ./configs/coco_R_50_FPN_2x_base.yaml \
+                                --num-gpus 8 --dist-url tcp://127.0.0.1:1681 \
+                                MODEL.WEIGHTS ./pretrain/R-50_t.pkl \
+                                OUTPUT_DIR ./exp_coco2x/supervised
+
+python train_net.py --config-file ./configs/coco_R_50_FPN_2x_base.yaml \
+                                --num-gpus 8 --dist-url tcp://127.0.0.1:1681 \
+                                MODEL.WEIGHTS None \
+                                OUTPUT_DIR ./exp_coco2x/supervised
diff --git a/detection/.ipynb_checkpoints/train_net-checkpoint.py b/detection/.ipynb_checkpoints/train_net-checkpoint.py
@@ -0,0 +1,75 @@
+#!/usr/bin/env python
+# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
+
+import os
+
+from detectron2.checkpoint import DetectionCheckpointer
+from detectron2.config import get_cfg
+from detectron2.engine import DefaultTrainer, default_argument_parser, default_setup, launch
+from detectron2.evaluation import COCOEvaluator, PascalVOCDetectionEvaluator
+from detectron2.layers import get_norm
+from detectron2.modeling.roi_heads import ROI_HEADS_REGISTRY, Res5ROIHeads
+
+
+@ROI_HEADS_REGISTRY.register()
+class Res5ROIHeadsExtraNorm(Res5ROIHeads):
+    """
+    As described in the MOCO paper, there is an extra BN layer
+    following the res5 stage.
+    """
+    def _build_res5_block(self, cfg):
+        seq, out_channels = super()._build_res5_block(cfg)
+        norm = cfg.MODEL.RESNETS.NORM
+        norm = get_norm(norm, out_channels)
+        seq.add_module("norm", norm)
+        return seq, out_channels
+
+
+class Trainer(DefaultTrainer):
+    @classmethod
+    def build_evaluator(cls, cfg, dataset_name, output_folder=None):
+        if output_folder is None:
+            output_folder = os.path.join(cfg.OUTPUT_DIR, "inference")
+        if "coco" in dataset_name:
+            return COCOEvaluator(dataset_name, cfg, True, output_folder)
+        else:
+            assert "voc" in dataset_name
+            return PascalVOCDetectionEvaluator(dataset_name)
+
+
+def setup(args):
+    cfg = get_cfg()
+    cfg.merge_from_file(args.config_file)
+    cfg.merge_from_list(args.opts)
+    cfg.freeze()
+    default_setup(cfg, args)
+    return cfg
+
+
+def main(args):
+    cfg = setup(args)
+
+    if args.eval_only:
+        model = Trainer.build_model(cfg)
+        DetectionCheckpointer(model, save_dir=cfg.OUTPUT_DIR).resume_or_load(
+            cfg.MODEL.WEIGHTS, resume=args.resume
+        )
+        res = Trainer.test(cfg, model)
+        return res
+
+    trainer = Trainer(cfg)
+    trainer.resume_or_load(resume=args.resume)
+    return trainer.train()
+
+
+if __name__ == "__main__":
+    args = default_argument_parser().parse_args()
+    print("Command Line Args:", args)
+    launch(
+        main,
+        args.num_gpus,
+        num_machines=args.num_machines,
+        machine_rank=args.machine_rank,
+        dist_url=args.dist_url,
+        args=(args,),
+    )
diff --git a/detection/README.md b/detection/README.md
@@ -0,0 +1,61 @@
+
+## MoCo: Transferring to Detection
+
+The `train_net.py` script reproduces the object detection experiments on Pascal VOC and COCO.
+
+### Instruction
+
+1. Install [detectron2](https://github.com/facebookresearch/detectron2/blob/master/INSTALL.md).
+
+1. Convert a pre-trained MoCo model to detectron2's format:
+   ```
+   python3 convert-pretrain-to-detectron2.py input.pth.tar output.pkl
+   ```
+
+1. Put dataset under "./datasets" directory,
+   following the [directory structure](https://github.com/facebookresearch/detectron2/tree/master/datasets)
+	 requried by detectron2.
+
+1. Run training:
+   ```
+   python train_net.py --config-file configs/pascal_voc_R_50_C4_24k_moco.yaml \
+	--num-gpus 8 MODEL.WEIGHTS ./output.pkl
+   ```
+
+### Results
+
+Below are the results on Pascal VOC 2007 test, fine-tuned on 2007+2012 trainval for 24k iterations using Faster R-CNN with a R50-C4 backbone:
+
+<table><tbody>
+<!-- START TABLE -->
+<!-- TABLE HEADER -->
+<th valign="bottom">pretrain</th>
+<th valign="bottom">AP50</th>
+<th valign="bottom">AP</th>
+<th valign="bottom">AP75</th>
+<!-- TABLE BODY -->
+<tr><td align="left">ImageNet-1M, supervised</td>
+<td align="center">81.3</td>
+<td align="center">53.5</td>
+<td align="center">58.8</td>
+</tr>
+<tr><td align="left">ImageNet-1M, MoCo v1, 200ep</td>
+<td align="center">81.5</td>
+<td align="center">55.9</td>
+<td align="center">62.6</td>
+</tr>
+</tr>
+<tr><td align="left">ImageNet-1M, MoCo v2, 200ep</td>
+<td align="center">82.4</td>
+<td align="center">57.0</td>
+<td align="center">63.6</td>
+</tr>
+</tr>
+<tr><td align="left">ImageNet-1M, MoCo v2, 800ep</td>
+<td align="center">82.5</td>
+<td align="center">57.4</td>
+<td align="center">64.0</td>
+</tr>
+</tbody></table>
+
+***Note:*** These results are means of 5 trials. Variation on Pascal VOC is large: the std of AP50, AP, AP75 is expected to be 0.2, 0.2, 0.4 in most cases. We recommend to run 5 trials and compute means.
diff --git a/detection/configs/.ipynb_checkpoints/Base-RCNN-FPN-checkpoint.yaml b/detection/configs/.ipynb_checkpoints/Base-RCNN-FPN-checkpoint.yaml
@@ -0,0 +1,44 @@
+MODEL:
+  META_ARCHITECTURE: "GeneralizedRCNN"
+  BACKBONE:
+    NAME: "build_resnet_fpn_backbone"
+  RESNETS:
+    OUT_FEATURES: ["res2", "res3", "res4", "res5"]
+  FPN:
+    IN_FEATURES: ["res2", "res3", "res4", "res5"]
+  ANCHOR_GENERATOR:
+    SIZES: [[32], [64], [128], [256], [512]]  # One size for each in feature map
+    ASPECT_RATIOS: [[0.5, 1.0, 2.0]]  # Three aspect ratios (same for all in feature maps)
+  RPN:
+    IN_FEATURES: ["p2", "p3", "p4", "p5", "p6"]
+    PRE_NMS_TOPK_TRAIN: 2000  # Per FPN level
+    PRE_NMS_TOPK_TEST: 1000  # Per FPN level
+    # Detectron1 uses 2000 proposals per-batch,
+    # (See "modeling/rpn/rpn_outputs.py" for details of this legacy issue)
+    # which is approximately 1000 proposals per-image since the default batch size for FPN is 2.
+    POST_NMS_TOPK_TRAIN: 1000
+    POST_NMS_TOPK_TEST: 1000
+  ROI_HEADS:
+    NAME: "StandardROIHeads"
+    IN_FEATURES: ["p2", "p3", "p4", "p5"]
+  ROI_BOX_HEAD:
+    NAME: "FastRCNNConvFCHead"
+    NUM_FC: 2
+    POOLER_RESOLUTION: 7
+  ROI_MASK_HEAD:
+    NAME: "MaskRCNNConvUpsampleHead"
+    NUM_CONV: 4
+    POOLER_RESOLUTION: 14
+DATASETS:
+  TRAIN: ("coco_2017_train",)
+  TEST: ("coco_2017_val",)
+SOLVER:
+  IMS_PER_BATCH: 16
+  BASE_LR: 0.02
+  STEPS: (60000, 80000)
+  MAX_ITER: 90000
+  AMP:
+    ENABLED: True
+INPUT:
+  MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800)
+VERSION: 2
diff --git a/detection/configs/.ipynb_checkpoints/coco_R_50_C4_2x-checkpoint.yaml b/detection/configs/.ipynb_checkpoints/coco_R_50_C4_2x-checkpoint.yaml
@@ -0,0 +1,13 @@
+_BASE_: "Base-RCNN-C4-BN.yaml"
+MODEL:
+  MASK_ON: True
+  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+INPUT:
+  MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800)
+  MIN_SIZE_TEST: 800
+DATASETS:
+  TRAIN: ("coco_2017_train",)
+  TEST: ("coco_2017_val",)
+SOLVER:
+  STEPS: (120000, 160000)
+  MAX_ITER: 180000
diff --git a/detection/configs/.ipynb_checkpoints/coco_R_50_C4_2x_base-checkpoint.yaml b/detection/configs/.ipynb_checkpoints/coco_R_50_C4_2x_base-checkpoint.yaml
@@ -0,0 +1,9 @@
+_BASE_: "coco_R_50_C4_2x.yaml"
+MODEL:
+  PIXEL_MEAN: [123.675, 116.280, 103.530]
+  PIXEL_STD: [58.395, 57.120, 57.375]
+  WEIGHTS: "See Instructions"
+  RESNETS:
+    STRIDE_IN_1X1: False
+INPUT:
+  FORMAT: "RGB"
diff --git a/detection/configs/.ipynb_checkpoints/coco_R_50_FPN_1x-checkpoint.yaml b/detection/configs/.ipynb_checkpoints/coco_R_50_FPN_1x-checkpoint.yaml
@@ -0,0 +1,24 @@
+_BASE_: "Base-RCNN-FPN.yaml"
+MODEL:
+  MASK_ON: True
+  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+  BACKBONE:
+    FREEZE_AT: 0
+  RESNETS:
+    DEPTH: 50
+    NORM: "SyncBN"
+  FPN:
+    NORM: "SyncBN"
+  ROI_BOX_HEAD:
+    NAME: "FastRCNNConvFCHead"
+    NUM_CONV: 4
+    NUM_FC: 1
+    NORM: "SyncBN"
+  ROI_MASK_HEAD:
+    NORM: "SyncBN"
+TEST:
+  PRECISE_BN:
+    ENABLED: True
+SOLVER:
+  STEPS: (60000, 80000)
+  MAX_ITER: 90000