diff --git a/.github/workflows/pythonpublish.yml b/.github/workflows/pypi.yml
similarity index 95%
rename from .github/workflows/pythonpublish.yml
rename to .github/workflows/pypi.yml
index f167525a..4a019573 100644
--- a/.github/workflows/pythonpublish.yml
+++ b/.github/workflows/pypi.yml
@@ -4,9 +4,8 @@
name: Upload Python Package
on:
- push:
- branches:
- - master
+ schedule:
+ - cron: "0 12 * * *"
jobs:
deploy:
diff --git a/.github/workflows/build_sphix_master.yml b/.github/workflows/sphix_build_master.yml
similarity index 97%
rename from .github/workflows/build_sphix_master.yml
rename to .github/workflows/sphix_build_master.yml
index adc53018..2bc2462d 100644
--- a/.github/workflows/build_sphix_master.yml
+++ b/.github/workflows/sphix_build_master.yml
@@ -47,5 +47,5 @@ jobs:
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
with:
- build_dir: "docs/build/html/*"
+ build_dir: docs/build/html/
target_branch: gh-pages
diff --git a/.github/workflows/sphix_build_pr.yml b/.github/workflows/sphix_build_pr.yml
index 5b171b9a..4cc3850e 100644
--- a/.github/workflows/sphix_build_pr.yml
+++ b/.github/workflows/sphix_build_pr.yml
@@ -9,16 +9,21 @@ on:
jobs:
docs:
- runs-on: ubuntu-latest
+ runs-on: self-hosted
steps:
- uses: actions/checkout@v2
- uses: seanmiddleditch/gha-setup-ninja@master
-
- - name: Set up Python
- uses: actions/setup-python@v1
+
+ - name: Set PR Number
+ uses: actions/github-script@0.3.0
with:
- python-version: 3.7
-
+ github-token: ${{github.token}}
+ script: |
+ const core = require('@actions/core')
+ const prNumber = context.payload.number;
+ core.exportVariable('PULL_NUMBER', prNumber);
+ core.exportVariable("PATH", "/home/ubuntu/anaconda3/bin:/usr/local/bin:/usr/bin/:/bin:$PATH")
+
- name: Install dependencies
run: |
python -m pip install --upgrade pip
@@ -39,35 +44,11 @@ jobs:
cd docs/
make html
touch build/html/.nojekyll
-
- - name: Set PR Number
- uses: actions/github-script@0.3.0
- with:
- github-token: ${{github.token}}
- script: |
- const core = require('@actions/core')
- const prNumber = context.payload.number;
- core.exportVariable('PULL_NUMBER', prNumber);
-
-
- # https://github.com/marketplace/actions/github-pages
- - name: Deploy
- if: success()
- uses: jakejarvis/s3-sync-action@master
- with:
- args: --acl public-read --follow-symlinks --delete
- env:
- AWS_S3_BUCKET: ${{ secrets.AWS_S3_BUCKET }}
- AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
- AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
- AWS_REGION: ${{ secrets.AWS_REGION }}
- DEST_DIR: "${{ secrets.DEST_DIR }}/${PULL_NUMBER}"
- SOURCE_DIR: 'docs/build/html/'
-
+ aws s3 sync build/html/ s3://hangzh/encoding/docs/${{ env.PULL_NUMBER }}/ --acl public-read --follow-symlinks --delete
- name: Comment
if: success()
uses: thollander/actions-comment-pull-request@master
with:
- message: "The docs are uploaded and can be previewed at http://${{ secrets.AWS_S3_BUCKET }}.s3.amazonaws.com/${{ secrets.DEST_DIR }}/${{ env.PULL_NUMBER }}/index.html"
+ message: "The docs are uploaded and can be previewed at http://hangzh.s3.amazonaws.com/encoding/docs/${{ env.PULL_NUMBER }}/index.html"
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
diff --git a/.gitignore b/.gitignore
index fc428944..cc0b4412 100644
--- a/.gitignore
+++ b/.gitignore
@@ -8,3 +8,6 @@ docs/src/
docs/html/
encoding/_ext/
encoding.egg-info/
+*.o
+*.so
+*.ninja*
diff --git a/README.md b/README.md
index 06851187..a4c9856e 100644
--- a/README.md
+++ b/README.md
@@ -1,7 +1,8 @@
[![PyPI](https://img.shields.io/pypi/v/torch-encoding.svg)](https://pypi.python.org/pypi/torch-encoding)
-[![PyPI Pre-release](https://img.shields.io/badge/pypi--prerelease-v1.1.0-ff69b4.svg)](https://pypi.org/project/torch-encoding/#history)
+[![PyPI Pre-release](https://img.shields.io/badge/pypi--prerelease-v1.2.0-ff69b4.svg)](https://pypi.org/project/torch-encoding/#history)
[![Upload Python Package](https://github.com/zhanghang1989/PyTorch-Encoding/workflows/Upload%20Python%20Package/badge.svg)](https://github.com/zhanghang1989/PyTorch-Encoding/actions)
[![Downloads](http://pepy.tech/badge/torch-encoding)](http://pepy.tech/project/torch-encoding)
+[![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
[![Build Docs](https://github.com/zhanghang1989/PyTorch-Encoding/workflows/Build%20Docs/badge.svg)](https://github.com/zhanghang1989/PyTorch-Encoding/actions)
# PyTorch-Encoding
@@ -11,10 +12,23 @@ created by [Hang Zhang](http://hangzh.com/)
- Please visit the [**Docs**](http://hangzh.com/PyTorch-Encoding/) for detail instructions of installation and usage.
-- Please visit the [link](http://hangzh.com/PyTorch-Encoding/experiments/segmentation.html) to examples of semantic segmentation.
+- Please visit the [link](http://hangzh.com/PyTorch-Encoding/model_zoo/imagenet.html) to image classification models.
+
+- Please visit the [link](http://hangzh.com/PyTorch-Encoding/model_zoo/segmentation.html) to semantic segmentation models.
## Citations
+**ResNeSt: Split-Attention Networks** [[arXiv]]()
+ [Hang Zhang](http://hangzh.com/), Chongruo Wu, Zhongyue Zhang, Yi Zhu, Zhi Zhang, Haibin Lin, Yue Sun, Tong He, Jonas Muller, R. Manmatha, Mu Li and Alex Smola
+```
+@article{zhang2020resnest,
+title={ResNeSt: Split-Attention Networks},
+author={Zhang, Hang and Wu, Chongruo and Zhang, Zhongyue and Zhu, Yi and Zhang, Zhi and Lin, Haibin and Sun, Yue and He, Tong and Muller, Jonas and Manmatha, R. and Li, Mu and Smola, Alexander},
+journal={arXiv preprint},
+year={2020}
+}
+```
+
**Context Encoding for Semantic Segmentation** [[arXiv]](https://arxiv.org/pdf/1803.08904.pdf)
[Hang Zhang](http://hangzh.com/), [Kristin Dana](http://eceweb1.rutgers.edu/vision/dana.html), [Jianping Shi](http://shijianping.me/), [Zhongyue Zhang](http://zhongyuezhang.com/), [Xiaogang Wang](http://www.ee.cuhk.edu.hk/~xgwang/), [Ambrish Tyagi](https://scholar.google.com/citations?user=GaSWCoUAAAAJ&hl=en), [Amit Agrawal](http://www.amitkagrawal.com/)
```
diff --git a/docs/source/index.rst b/docs/source/index.rst
index fb8a9567..4cfcadcb 100644
--- a/docs/source/index.rst
+++ b/docs/source/index.rst
@@ -13,16 +13,23 @@ An optimized PyTorch package with CUDA backend.
.. toctree::
:glob:
:maxdepth: 1
- :caption: Notes
+ :caption: Installation
notes/*
.. toctree::
:glob:
:maxdepth: 1
- :caption: Experiment Systems
+ :caption: Model Zoo
- experiments/*
+ model_zoo/*
+
+.. toctree::
+ :glob:
+ :maxdepth: 1
+ :caption: Other Tutorials
+
+ tutorials/*
.. toctree::
:maxdepth: 1
@@ -30,7 +37,6 @@ An optimized PyTorch package with CUDA backend.
nn
parallel
- models
utils
Indices and tables
diff --git a/docs/source/model_zoo/imagenet.rst b/docs/source/model_zoo/imagenet.rst
new file mode 100644
index 00000000..982c7ca2
--- /dev/null
+++ b/docs/source/model_zoo/imagenet.rst
@@ -0,0 +1,83 @@
+Image Classification
+====================
+
+Install Package
+---------------
+
+- Clone the GitHub repo::
+
+ git clone https://github.com/zhanghang1989/PyTorch-Encoding
+
+- Install PyTorch Encoding (if not yet). Please follow the installation guide `Installing PyTorch Encoding <../notes/compile.html>`_.
+
+Get Pre-trained Model
+---------------------
+
+.. hint::
+ How to get pretrained model, for example ``ResNeSt50``::
+
+ model = encoding.models.get_model('ResNeSt50', pretrained=True)
+
+ After clicking ``cmd`` in the table, the command for training the model can be found below the table.
+
+.. role:: raw-html(raw)
+ :format: html
+
+
+ResNeSt
+~~~~~~~
+
+.. note::
+ The provided models were trained using MXNet Gluon, this PyTorch implementation is slightly worse than the original implementation.
+
+=============================== ============== ============== =========================================================================================================
+Model crop-size Acc Command
+=============================== ============== ============== =========================================================================================================
+ResNeSt-50 224 81.03 :raw-html:`cmd`
+ResNeSt-101 256 82.83 :raw-html:`cmd`
+ResNeSt-200 320 83.84 :raw-html:`cmd`
+ResNeSt-269 416 84.54 :raw-html:`cmd`
+=============================== ============== ============== =========================================================================================================
+
+.. raw:: html
+
+
+ # change the rank for worker node
+ python train_dist.py --dataset imagenet --model resnest50 --lr-scheduler cos --epochs 270 --checkname resnest50 --lr 0.025 --batch-size 64 --dist-url tcp://MASTER:NODE:IP:ADDRESS:23456 --world-size 4 --label-smoothing 0.1 --mixup 0.2 --no-bn-wd --last-gamma --warmup-epochs 5 --rand-aug --rank 0
+
+
+
+ # change the rank for worker node
+ python train_dist.py --dataset imagenet --model resnest101 --lr-scheduler cos --epochs 270 --checkname resnest101 --lr 0.025 --batch-size 64 --dist-url tcp://MASTER:NODE:IP:ADDRESS:23456 --world-size 4 --label-smoothing 0.1 --mixup 0.2 --no-bn-wd --last-gamma --warmup-epochs 5 --rand-aug --rank 0
+
+
+
+ # change the rank for worker node
+ python train_dist.py --dataset imagenet --model resnest200 --lr-scheduler cos --epochs 270 --checkname resnest200 --lr 0.0125 --batch-size 32 --dist-url tcp://MASTER:NODE:IP:ADDRESS:23456 --world-size 8 --label-smoothing 0.1 --mixup 0.2 --no-bn-wd --last-gamma --warmup-epochs 5 --rand-aug --crop-size 256 --rank 0
+
+
+
+ # change the rank for worker node
+ python train_dist.py --dataset imagenet --model resnest269 --lr-scheduler cos --epochs 270 --checkname resnest269 --lr 0.0125 --batch-size 32 --dist-url tcp://MASTER:NODE:IP:ADDRESS:23456 --world-size 8 --label-smoothing 0.1 --mixup 0.2 --no-bn-wd --last-gamma --warmup-epochs 5 --rand-aug --crop-size 320 --rank 0
+
+
+Test Pretrained
+~~~~~~~~~~~~~~~
+
+- Prepare the datasets by downloading the data into current folder and then runing the scripts in the ``scripts/`` folder::
+
+ python scripts/prepare_imagenet.py --data-dir ./
+
+- The test script is in the ``experiments/recognition/`` folder. For evaluating the model (using MS),
+ for example ``ResNeSt50``::
+
+ python test.py --dataset imagenet --model-zoo ResNeSt50 --crop-size 224 --eval
+
+Train Your Own Model
+--------------------
+
+- Prepare the datasets by downloading the data into current folder and then runing the scripts in the ``scripts/`` folder::
+
+ python scripts/prepare_imagenet.py --data-dir ./
+
+- The training script is in the ``experiments/recognition/`` folder. Commands for reproducing pre-trained models can be found in the table.
diff --git a/docs/source/experiments/segmentation.rst b/docs/source/model_zoo/segmentation.rst
similarity index 59%
rename from docs/source/experiments/segmentation.rst
rename to docs/source/model_zoo/segmentation.rst
index cbd74b23..72bc3646 100644
--- a/docs/source/experiments/segmentation.rst
+++ b/docs/source/model_zoo/segmentation.rst
@@ -1,5 +1,5 @@
-Context Encoding for Semantic Segmentation (EncNet)
-===================================================
+Semantic Segmentation
+=====================
Install Package
---------------
@@ -29,31 +29,52 @@ Get Pre-trained Model
:format: html
-.. tabularcolumns:: |>{\centering\arraybackslash}\X{4}{5}|>{\raggedleft\arraybackslash}\X{1}{5}|
+ResNeSt Backbone Models
+-----------------------
-============================================================================== ============== ============== =============================================================================================
+============================================================================== ============== ============== =========================================================================================================
Model pixAcc mIoU Command
-============================================================================== ============== ============== =============================================================================================
-Encnet_ResNet50_PContext 79.2% 51.0% :raw-html:`cmd`
-EncNet_ResNet101_PContext 80.7% 54.1% :raw-html:`cmd`
-EncNet_ResNet50_ADE 80.1% 41.5% :raw-html:`cmd`
-EncNet_ResNet101_ADE 81.3% 44.4% :raw-html:`cmd`
-EncNet_ResNet101_VOC N/A 85.9% :raw-html:`cmd`
-============================================================================== ============== ============== =============================================================================================
-
+============================================================================== ============== ============== =========================================================================================================
+FCN_ResNeSt50_ADE xx.xx% xx.xx% :raw-html:`cmd`
+DeepLabV3_ResNeSt50_ADE 81.17% 45.12% :raw-html:`cmd`
+DeepLabV3_ResNeSt101_ADE 82.07% 46.91% :raw-html:`cmd`
+============================================================================== ============== ============== =========================================================================================================
.. raw:: html
-
- CUDA_VISIBLE_DEVICES=0,1,2,3 python train.py --dataset PContext --model FCN
+
+ python train.py --dataset ade20k --model fcn --aux --backbone resnest50 --batch-size 2
-
- CUDA_VISIBLE_DEVICES=0,1,2,3 python train.py --dataset PContext --model EncNet --aux --se-loss
+
+ python train.py --dataset ADE20K --model deeplab --aux --backbone resnest50
-
- CUDA_VISIBLE_DEVICES=0,1,2,3 python train.py --dataset PContext --model EncNet --aux --se-loss --backbone resnet101
+
+ python train.py --dataset ADE20K --model deeplab --aux --backbone resnest101
+
+
+
+ResNet Backbone Models
+----------------------
+
+ADE20K Dataset
+~~~~~~~~~~~~~~
+
+============================================================================== ================= ============== =============================================================================================
+Model pixAcc mIoU Command
+============================================================================== ================= ============== =============================================================================================
+FCN_ResNet50_ADE 78.7% 38.5% :raw-html:`cmd`
+EncNet_ResNet50_ADE 80.1% 41.5% :raw-html:`cmd`
+EncNet_ResNet101_ADE 81.3% 44.4% :raw-html:`cmd`
+EncNet_ResNet101_VOC N/A 85.9% :raw-html:`cmd`
+============================================================================== ================= ============== =============================================================================================
+
+
+.. raw:: html
+
+
+ CUDA_VISIBLE_DEVICES=0,1,2,3 python train.py --dataset ADE20K --model FCN
@@ -64,7 +85,6 @@ EncNet_ResNet101_VOC
CUDA_VISIBLE_DEVICES=0,1,2,3 python train.py --dataset ADE20K --model EncNet --aux --se-loss
-
CUDA_VISIBLE_DEVICES=0,1,2,3 python train.py --dataset ADE20K --model EncNet --aux --se-loss --backbone resnet101 --base-size 640 --crop-size 576
@@ -77,6 +97,33 @@ EncNet_ResNet101_VOC
CUDA_VISIBLE_DEVICES=0,1,2,3 python train.py --dataset Pascal_voc --model encnet --aux --se-loss --backbone resnet101 --lr 0.0001 --syncbn --ngpus 4 --checkname res101 --resume runs/Pascal_aug/encnet/res101/checkpoint.params --ft
+
+
+Pascal Context Dataset
+~~~~~~~~~~~~~~~~~~~~~~
+
+============================================================================== ================= ============== =============================================================================================
+Model pixAcc mIoU Command
+============================================================================== ================= ============== =============================================================================================
+Encnet_ResNet50_PContext 79.2% 51.0% :raw-html:`cmd`
+EncNet_ResNet101_PContext 80.7% 54.1% :raw-html:`cmd`
+============================================================================== ================= ============== =============================================================================================
+
+.. raw:: html
+
+
+ CUDA_VISIBLE_DEVICES=0,1,2,3 python train.py --dataset PContext --model FCN
+
+
+
+ CUDA_VISIBLE_DEVICES=0,1,2,3 python train.py --dataset PContext --model EncNet --aux --se-loss
+
+
+
+ CUDA_VISIBLE_DEVICES=0,1,2,3 python train.py --dataset PContext --model EncNet --aux --se-loss --backbone resnet101
+
+
+
Test Pretrained
~~~~~~~~~~~~~~~
@@ -127,13 +174,13 @@ Quick Demo
Train Your Own Model
--------------------
-- Prepare the datasets by runing the scripts in the ``scripts/`` folder, for example preparing ``PASCAL Context`` dataset::
+- Prepare the datasets by runing the scripts in the ``scripts/`` folder, for example preparing ``ADE20K`` dataset::
- python scripts/prepare_pcontext.py
+ python scripts/prepare_ade20k.py
- The training script is in the ``experiments/segmentation/`` folder, example training command::
- CUDA_VISIBLE_DEVICES=0,1,2,3 python train.py --dataset pcontext --model encnet --aux --se-loss
+ python train_dist.py --dataset ade20k --model encnet --aux --se-loss
- Detail training options, please run ``python train.py -h``. Commands for reproducing pre-trained models can be found in the table.
@@ -142,7 +189,7 @@ Train Your Own Model
training correctness purpose. For evaluating the pretrained model on validation set using MS,
please use the command::
- CUDA_VISIBLE_DEVICES=0,1,2,3 python test.py --dataset pcontext --model encnet --aux --se-loss --resume mycheckpoint --eval
+ python test.py --dataset pcontext --model encnet --aux --se-loss --resume mycheckpoint --eval
Citation
--------
diff --git a/docs/source/models.rst b/docs/source/models.rst
deleted file mode 100644
index 0ec7a81f..00000000
--- a/docs/source/models.rst
+++ /dev/null
@@ -1,52 +0,0 @@
-.. role:: hidden
- :class: hidden-section
-
-encoding.models
-================
-
-.. automodule:: encoding.models.resnet
-.. currentmodule:: encoding.models.resnet
-
-ResNet
-------
-
-We provide correct dilated pre-trained ResNet and DenseNet (stride of 8) for semantic segmentation.
-For dilation of DenseNet, we provide :class:`encoding.nn.DilatedAvgPool2d`.
-All provided models have been verified.
-
-.. note::
- This code is provided together with the paper
-
- * Hang Zhang, Kristin Dana, Jianping Shi, Zhongyue Zhang, Xiaogang Wang, Ambrish Tyagi, Amit Agrawal. "Context Encoding for Semantic Segmentation" *The IEEE Conference on Computer Vision and Pattern Recognition (CVPR) 2018*
-
-
-:hidden:`ResNet`
-~~~~~~~~~~~~~~~~
-
-.. autoclass:: ResNet
- :members:
-
-:hidden:`resnet18`
-~~~~~~~~~~~~~~~~~~
-
-.. autofunction:: resnet18
-
-:hidden:`resnet34`
-~~~~~~~~~~~~~~~~~~
-
-.. autofunction:: resnet34
-
-:hidden:`resnet50`
-~~~~~~~~~~~~~~~~~~
-
-.. autofunction:: resnet50
-
-:hidden:`resnet101`
-~~~~~~~~~~~~~~~~~~~
-
-.. autofunction:: resnet101
-
-:hidden:`resnet152`
-~~~~~~~~~~~~~~~~~~~
-
-.. autofunction:: resnet152
diff --git a/docs/source/nn.rst b/docs/source/nn.rst
index 7310ac15..ef888c90 100644
--- a/docs/source/nn.rst
+++ b/docs/source/nn.rst
@@ -14,6 +14,12 @@ Customized NN modules in Encoding Package. For Synchronized Cross-GPU Batch Norm
.. autoclass:: Encoding
:members:
+:hidden:`DistSyncBatchNorm`
+~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+.. autoclass:: DistSyncBatchNorm
+ :members:
+
:hidden:`SyncBatchNorm`
~~~~~~~~~~~~~~~~~~~~~~~~
diff --git a/docs/source/notes/compile.rst b/docs/source/notes/compile.rst
index 40d570d8..3a3e305b 100644
--- a/docs/source/notes/compile.rst
+++ b/docs/source/notes/compile.rst
@@ -5,17 +5,41 @@ Install and Citations
Installation
------------
- * Install PyTorch 1.0 by following the `PyTorch instructions `_.
+ * Install PyTorch 1.4.0 by following the `PyTorch instructions `_.
* PIP Install::
- pip install torch-encoding
+ pip install torch-encoding --pre
* Install from source::
git clone https://github.com/zhanghang1989/PyTorch-Encoding && cd PyTorch-Encoding
python setup.py install
+
+Detailed Steps
+--------------
+
+This tutorial is a sucessful setup example for AWS EC2 p3 instance with ubuntu 16.04, CUDA 10.
+We cannot guarantee it to work for all the machines, but the steps should be similar.
+Assuming CUDA and cudnn are already sucessfully installed, otherwise please refer to other tutorials.
+
+ * Install Anaconda from the `link `_ .
+
+ * Install ninja::
+
+ wget https://github.com/ninja-build/ninja/releases/download/v1.8.2/ninja-linux.zip
+ sudo unzip ninja-linux.zip -d /usr/local/bin/
+ sudo update-alternatives --install /usr/bin/ninja ninja /usr/local/bin/ninja 1 --force
+
+ * Install PyTorch::
+
+ conda install pytorch torchvision cudatoolkit=100 -c pytorch
+
+ * Install this package::
+
+ pip install torch-encoding --pre
+
Citations
---------
diff --git a/docs/source/parallel.rst b/docs/source/parallel.rst
index 99738483..3f66b73b 100644
--- a/docs/source/parallel.rst
+++ b/docs/source/parallel.rst
@@ -7,10 +7,7 @@ encoding.parallel
- Current PyTorch DataParallel Table is not supporting mutl-gpu loss calculation, which makes the gpu memory usage very in-balance. We address this issue here by doing DataParallel for Model & Criterion.
.. note::
- This code is provided together with the paper
-
- * Hang Zhang, Kristin Dana, Jianping Shi, Zhongyue Zhang, Xiaogang Wang, Ambrish Tyagi, Amit Agrawal. "Context Encoding for Semantic Segmentation" *The IEEE Conference on Computer Vision and Pattern Recognition (CVPR) 2018*
-
+ Deprecated, please use torch.nn.parallel.DistributedDataParallel with :class:`encoding.nn.DistSyncBatchNorm` for the best performance.
.. automodule:: encoding.parallel
.. currentmodule:: encoding.parallel
diff --git a/docs/source/experiments/cifar.rst b/docs/source/tutorials/cifar.rst
similarity index 100%
rename from docs/source/experiments/cifar.rst
rename to docs/source/tutorials/cifar.rst
diff --git a/docs/source/experiments/style.rst b/docs/source/tutorials/style.rst
similarity index 100%
rename from docs/source/experiments/style.rst
rename to docs/source/tutorials/style.rst
diff --git a/docs/source/notes/syncbn.rst b/docs/source/tutorials/syncbn.rst
similarity index 100%
rename from docs/source/notes/syncbn.rst
rename to docs/source/tutorials/syncbn.rst
diff --git a/docs/source/experiments/texture.rst b/docs/source/tutorials/texture.rst
similarity index 100%
rename from docs/source/experiments/texture.rst
rename to docs/source/tutorials/texture.rst
diff --git a/docs/source/utils.rst b/docs/source/utils.rst
index a5bd538f..0c5d9648 100644
--- a/docs/source/utils.rst
+++ b/docs/source/utils.rst
@@ -20,6 +20,12 @@ Useful util functions.
.. autofunction:: save_checkpoint
+:hidden:`SegmentationMetric`
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+.. autoclass:: SegmentationMetric
+ :members:
+
:hidden:`batch_pix_accuracy`
~~~~~~~~~~~~~~~~~~~~~~~~~~~~
diff --git a/encoding/datasets/ade20k.py b/encoding/datasets/ade20k.py
index 56b172d1..0d732ae6 100644
--- a/encoding/datasets/ade20k.py
+++ b/encoding/datasets/ade20k.py
@@ -57,38 +57,43 @@ def __getitem__(self, index):
mask = self.target_transform(mask)
return img, mask
- def _sync_transform(self, img, mask):
- # random mirror
- if random.random() < 0.5:
- img = img.transpose(Image.FLIP_LEFT_RIGHT)
- mask = mask.transpose(Image.FLIP_LEFT_RIGHT)
- crop_size = self.crop_size
- w, h = img.size
- long_size = random.randint(int(self.base_size*0.5), int(self.base_size*2.5))
- if h > w:
- oh = long_size
- ow = int(1.0 * w * long_size / h + 0.5)
- short_size = ow
- else:
- ow = long_size
- oh = int(1.0 * h * long_size / w + 0.5)
- short_size = oh
- img = img.resize((ow, oh), Image.BILINEAR)
- mask = mask.resize((ow, oh), Image.NEAREST)
- # pad crop
- if short_size < crop_size:
- padh = crop_size - oh if oh < crop_size else 0
- padw = crop_size - ow if ow < crop_size else 0
- img = ImageOps.expand(img, border=(0, 0, padw, padh), fill=0)
- mask = ImageOps.expand(mask, border=(0, 0, padw, padh), fill=0)
- # random crop crop_size
- w, h = img.size
- x1 = random.randint(0, w - crop_size)
- y1 = random.randint(0, h - crop_size)
- img = img.crop((x1, y1, x1+crop_size, y1+crop_size))
- mask = mask.crop((x1, y1, x1+crop_size, y1+crop_size))
- # final transform
- return img, self._mask_transform(mask)
+ #def _sync_transform(self, img, mask):
+ # # random mirror
+ # if random.random() < 0.5:
+ # img = img.transpose(Image.FLIP_LEFT_RIGHT)
+ # mask = mask.transpose(Image.FLIP_LEFT_RIGHT)
+ # crop_size = self.crop_size
+ # # random scale (short edge)
+ # w, h = img.size
+ # long_size = random.randint(int(self.base_size*0.5), int(self.base_size*2.0))
+ # if h > w:
+ # oh = long_size
+ # ow = int(1.0 * w * long_size / h + 0.5)
+ # short_size = ow
+ # else:
+ # ow = long_size
+ # oh = int(1.0 * h * long_size / w + 0.5)
+ # short_size = oh
+ # img = img.resize((ow, oh), Image.BILINEAR)
+ # mask = mask.resize((ow, oh), Image.NEAREST)
+ # # pad crop
+ # if short_size < crop_size:
+ # padh = crop_size - oh if oh < crop_size else 0
+ # padw = crop_size - ow if ow < crop_size else 0
+ # img = ImageOps.expand(img, border=(0, 0, padw, padh), fill=0)
+ # mask = ImageOps.expand(mask, border=(0, 0, padw, padh), fill=0)
+ # # random crop crop_size
+ # w, h = img.size
+ # x1 = random.randint(0, w - crop_size)
+ # y1 = random.randint(0, h - crop_size)
+ # img = img.crop((x1, y1, x1+crop_size, y1+crop_size))
+ # mask = mask.crop((x1, y1, x1+crop_size, y1+crop_size))
+ # # gaussian blur as in PSP
+ # if random.random() < 0.5:
+ # img = img.filter(ImageFilter.GaussianBlur(
+ # radius=random.random()))
+ # # final transform
+ # return img, self._mask_transform(mask)
def _mask_transform(self, mask):
target = np.array(mask).astype('int64') - 1
diff --git a/encoding/datasets/base.py b/encoding/datasets/base.py
index 52b38fd5..bd64f784 100644
--- a/encoding/datasets/base.py
+++ b/encoding/datasets/base.py
@@ -67,6 +67,7 @@ def _sync_transform(self, img, mask):
img = img.transpose(Image.FLIP_LEFT_RIGHT)
mask = mask.transpose(Image.FLIP_LEFT_RIGHT)
crop_size = self.crop_size
+ # random scale (short edge)
w, h = img.size
long_size = random.randint(int(self.base_size*0.5), int(self.base_size*2.0))
if h > w:
diff --git a/encoding/datasets/cityscapes.py b/encoding/datasets/cityscapes.py
index 8e3b2842..aa19cc77 100644
--- a/encoding/datasets/cityscapes.py
+++ b/encoding/datasets/cityscapes.py
@@ -19,7 +19,7 @@
class CitySegmentation(BaseDataset):
NUM_CLASS = 19
- def __init__(self, root=os.path.expanduser('~/.encoding/data'), split='train',
+ def __init__(self, root=os.path.expanduser('~/.encoding/data/citys/'), split='train',
mode=None, transform=None, target_transform=None, **kwargs):
super(CitySegmentation, self).__init__(
root, split, mode, transform, target_transform, **kwargs)
diff --git a/encoding/datasets/cityscapescoarse.py b/encoding/datasets/cityscapescoarse.py
new file mode 100644
index 00000000..72bdd7aa
--- /dev/null
+++ b/encoding/datasets/cityscapescoarse.py
@@ -0,0 +1,158 @@
+###########################################################################
+# Created by: Hang Zhang
+# Email: zhang.hang@rutgers.edu
+# Copyright (c) 2017
+###########################################################################
+
+import os
+import sys
+import numpy as np
+import random
+import math
+from tqdm import tqdm
+from PIL import Image, ImageOps, ImageFilter
+
+import torch
+import torch.utils.data as data
+import torchvision.transforms as transform
+
+class Segmentation(data.Dataset):
+ BASE_DIR = 'cityscapes'
+
+ def __init__(self, data_folder, mode='train', transform=None,
+ target_transform=None):
+ self.root = os.path.join(data_folder, self.BASE_DIR)
+ self.transform = transform
+ self.target_transform = target_transform
+ self.mode = mode
+ self.images, self.masks = get_city_pairs(self.root, mode)
+ assert (len(self.images) == len(self.masks))
+ if len(self.images) == 0:
+ raise(RuntimeError("Found 0 images in subfolders of: \
+ " + self.root + "\n"))
+
+ def __getitem__(self, index):
+ img = Image.open(self.images[index]).convert('RGB')
+ if self.mode == 'test':
+ if self.transform is not None:
+ img = self.transform(img)
+ return img, os.path.basename(self.images[index])
+
+ mask = Image.open(self.masks[index])#.convert("P")
+ mask = np.array(mask)
+ mask += 1
+ mask[mask==256] = 0
+ mask = Image.fromarray(mask)
+ # synchrosized transform
+ if self.mode == 'train':
+ img, mask = self._sync_transform(img, mask)
+ elif self.mode == 'val':
+ img, mask = self._val_sync_transform(img, mask)
+ else:
+ raise RuntimeError('unknown mode for dataloader: {}'.format(self.mode))
+
+
+ # general resize, normalize and toTensor
+ if self.transform is not None:
+ #print("transform for input")
+ img = self.transform(img)
+ if self.target_transform is not None:
+ #print("transform for label")
+ mask = self.target_transform(mask)
+
+ return img, mask
+
+ def __len__(self):
+ return len(self.images)
+
+ def _val_sync_transform(self, img, mask):
+ """
+ synchronized transformation
+ """
+ outsize = 720
+ short = outsize
+ w, h = img.size
+ if w > h:
+ oh = short
+ ow = int(1.0 * w * oh / h)
+ else:
+ ow = short
+ oh = int(1.0 * h * ow / w)
+ img = img.resize((ow, oh), Image.BILINEAR)
+ mask = mask.resize((ow, oh), Image.NEAREST)
+ # center crop
+ w, h = img.size
+ x1 = int(round((w - outsize) / 2.))
+ y1 = int(round((h - outsize) / 2.))
+ img = img.crop((x1, y1, x1+outsize, y1+outsize))
+ mask = mask.crop((x1, y1, x1+outsize, y1+outsize))
+
+ return img, mask
+
+ def _sync_transform(self, img, mask):
+ # random mirror
+ if random.random() < 0.5:
+ img = img.transpose(Image.FLIP_LEFT_RIGHT)
+ mask = mask.transpose(Image.FLIP_LEFT_RIGHT)
+ base_size = 2048
+ crop_size = 720
+ # random scale (short edge from 480 to 720)
+ long_size = random.randint(int(base_size*0.5), int(base_size*2.0))
+ w, h = img.size
+ if h > w:
+ oh = long_size
+ ow = int(1.0 * w * oh / h)
+ short_size = ow
+ else:
+ ow = long_size
+ oh = int(1.0 * h * ow / w)
+ short_size = oh
+ img = img.resize((ow, oh), Image.BILINEAR)
+ mask = mask.resize((ow, oh), Image.NEAREST)
+ # random rotate -10~10, mask using NN rotate
+ deg = random.uniform(-10,10)
+ img = img.rotate(deg, resample=Image.BILINEAR)
+ mask = mask.rotate(deg, resample=Image.NEAREST)
+ # pad crop
+ if short_size < crop_size:
+ padh = crop_size - oh if oh < crop_size else 0
+ padw = crop_size - ow if ow < crop_size else 0
+ img = ImageOps.expand(img, border=(0,0,padw,padh), fill=0)
+ mask = ImageOps.expand(mask, border=(0,0,padw,padh), fill=0)
+ # random crop 480
+ w, h = img.size
+ x1 = random.randint(0, w - crop_size)
+ y1 = random.randint(0, h - crop_size)
+ img = img.crop((x1, y1, x1+crop_size, y1+crop_size))
+ mask = mask.crop((x1, y1, x1+crop_size, y1+crop_size))
+ # gaussian blur as in PSP ?
+ if random.random() < 0.5:
+ img = img.filter(ImageFilter.GaussianBlur(
+ radius=random.random()))
+ return img, mask
+
+
+def get_city_pairs(folder, mode='train'):
+ img_paths = []
+ mask_paths = []
+ if mode=='train':
+ img_folder = os.path.join(folder, 'leftImg8bit/train_extra')
+ mask_folder = os.path.join(folder, 'gtCoarse/train_extra')
+ else:
+ img_folder = os.path.join(folder, 'leftImg8bit/val')
+ mask_folder = os.path.join(folder, 'gtFine/val')
+ for root, directories, files in os.walk(img_folder):
+ for filename in files:
+ basename, extension =os.path.splitext(filename)
+ if filename.endswith(".png"):
+ imgpath = os.path.join(root, filename)
+ foldername = os.path.basename(os.path.dirname(imgpath))
+ maskname = filename.replace('leftImg8bit','gtCoarse_trainIds')
+ maskpath = os.path.join(mask_folder, foldername, maskname)
+ if os.path.isfile(imgpath) and os.path.isfile(maskpath):
+ img_paths.append(imgpath)
+ mask_paths.append(maskpath)
+ else:
+ print('cannot find the mask or image:', imgpath, maskpath)
+
+ return img_paths, mask_paths
diff --git a/encoding/datasets/folder.py b/encoding/datasets/folder.py
new file mode 100644
index 00000000..95fab1f8
--- /dev/null
+++ b/encoding/datasets/folder.py
@@ -0,0 +1,70 @@
+###########################################################################
+# Created by: Hang Zhang
+# Email: zhang.hang@rutgers.edu
+# Copyright (c) 2017
+###########################################################################
+
+import os
+import sys
+import numpy as np
+import random
+import math
+
+import torch.utils.data as data
+from PIL import Image, ImageOps
+
+import torch.utils.data as data
+import torchvision.transforms as transform
+from .dataset import ToLabel
+
+class FolderLoader(data.Dataset):
+ def __init__(self, root, transform=None):
+ self.root = root
+ self.transform = transform
+ self.images = get_folder_images(root)
+ if len(self.images) == 0:
+ raise(RuntimeError("Found 0 images in subfolders of: \
+ " + self.root + "\n"))
+
+ def __getitem__(self, index):
+ img = Image.open(self.images[index]).convert('RGB')
+ if self.transform is not None:
+ img = self.transform(img)
+ return img, os.path.basename(self.images[index])
+
+ def __len__(self):
+ return len(self.images)
+
+
+def get_folder_images(img_folder):
+ img_paths = []
+ for filename in os.listdir(img_folder):
+ if filename.endswith(".jpg"):
+ imgpath = os.path.join(img_folder, filename)
+ img_paths.append(imgpath)
+ return img_paths
+
+
+
+class Dataloder():
+ def __init__(self, args):
+ # the data augmentation is implemented as part of the dataloader
+ assert(args.test)
+ input_transform = transform.Compose([
+ transform.ToTensor(),
+ transform.Normalize(args.mean, args.std)])
+ args.test_batch_size = 1
+
+ assert(args.test_folder is not None)
+ print('loading the data from: {}'.format(args.test_folder))
+
+ testset = FolderLoader(args.test_folder, input_transform)
+ kwargs = {'num_workers': args.workers, 'pin_memory': True} \
+ if args.cuda else {}
+ self.trainloader = None
+ self.testloader = data.DataLoader(testset,
+ batch_size=args.test_batch_size,
+ shuffle=False, **kwargs)
+
+ def getloader(self):
+ return self.trainloader, self.testloader
diff --git a/encoding/datasets/hpw18.py b/encoding/datasets/hpw18.py
new file mode 100644
index 00000000..f9f89d6e
--- /dev/null
+++ b/encoding/datasets/hpw18.py
@@ -0,0 +1,104 @@
+# created by: Sean Liu
+# Amazon Lab 126
+from __future__ import print_function
+
+import errno
+import hashlib
+import os
+import sys
+import tarfile
+import numpy as np
+import random
+import math
+
+import torch.utils.data as data
+import PIL
+from PIL import Image, ImageOps
+
+from six.moves import urllib
+
+
+class Segmentation_HPW18(data.Dataset):
+ CLASSES = [
+ 'background', 'hat', 'hair', 'sunglasses', 'upper-clothes',
+ 'skirt', 'pants', 'dress', 'belt', 'left-shoe', 'right-shoe',
+ 'face', 'left-leg', 'right-leg', 'left-arm', 'right-arm', 'bag',
+ 'scarf'
+ ]
+
+ URL = "/cvdata1/lliuqian/humanParsingDataset"
+ FILE = "hpw18.tar.gz"
+ MD5 = ''
+ BASE_DIR = ''
+
+ def __init__(self,
+ root,
+ train=True,
+ transform=None,
+ target_transform=None,
+ download=False):
+ self.root = root
+ _hpw18_root = os.path.join(self.root, self.BASE_DIR)
+ _mask_dir = os.path.join(_hpw18_root, 'SegmentationClassAug_256x384')
+ _image_dir = os.path.join(_hpw18_root, 'JPEGImages_256x384')
+ self.transform = transform
+ self.target_transform = target_transform
+ self.train = train
+
+ if download:
+ self._download()
+
+ # train/val/test splits are pre-cut
+ _splits_dir = _hpw18_root
+ _split_f = os.path.join(_splits_dir, 'humanparsingImageMask_256x384_absPath_train.txt')
+ if not self.train:
+ _split_f = os.path.join(_splits_dir, 'humanparsingImageMask_256x384_absPath_val.txt')
+
+ print("reading from ", _split_f)
+
+ self.images = []
+ self.masks = []
+ with open(os.path.join(_split_f), "r") as lines:
+ for line in lines:
+ s = line.split()
+ _image = s[0] # image absolution path
+ _mask = s[1] # mask absolution path
+ assert os.path.isfile(_image)
+ assert os.path.isfile(_mask)
+ self.images.append(_image)
+ self.masks.append(_mask)
+ assert (len(self.images) == len(self.masks))
+
+ def __getitem__(self, index):
+ _img = Image.open(self.images[index]).convert('RGB')
+ _timg = Image.open(self.masks[index])
+ _target = np.array(_timg, dtype=np.uint8)
+ _target = Image.fromarray(_target)
+
+ # synchrosized transform
+ if self.train:
+ _img, _target = self._sync_transform( _img, _target)
+
+ # general resize, normalize and toTensor
+ if self.transform is not None:
+ _img = self.transform(_img)
+ if self.target_transform is not None:
+ _target = self.target_transform(_target)
+
+ return _img, _target
+
+ def __len__(self):
+ return len(self.images)
+
+ def _sync_transform(self, img, mask):
+ # random rotate -10~10
+ deg = random.uniform(-10,10)
+ img = img.rotate(deg)
+ mask = mask.rotate(deg, PIL.Image.NEAREST)
+
+ return img, mask
+
+if __name__ == '__main__':
+ hpw18 = Segmentation_HPW18('/cvdata1/lliuqian/', train=True)
+ print(hpw18[0])
+ print (len(hpw18))
diff --git a/encoding/datasets/imagenet.py b/encoding/datasets/imagenet.py
index 78b375f3..fbe5ff53 100644
--- a/encoding/datasets/imagenet.py
+++ b/encoding/datasets/imagenet.py
@@ -11,6 +11,9 @@
import torchvision.transforms as transforms
import torchvision.datasets as datasets
+import warnings
+warnings.filterwarnings("ignore", "(Possibly )?corrupt EXIF data", UserWarning)
+
class ImageNetDataset(datasets.ImageFolder):
BASE_DIR = "ILSVRC2012"
def __init__(self, root=os.path.expanduser('~/.encoding/data'), transform=None,
diff --git a/encoding/functions/__init__.py b/encoding/functions/__init__.py
index 9113739d..fd19e561 100644
--- a/encoding/functions/__init__.py
+++ b/encoding/functions/__init__.py
@@ -1,4 +1,6 @@
"""Encoding Autograd Fuctions"""
from .encoding import *
from .syncbn import *
+from .dist_syncbn import dist_syncbatchnorm
from .customize import *
+from .rectify import *
diff --git a/encoding/functions/dist_syncbn.py b/encoding/functions/dist_syncbn.py
new file mode 100644
index 00000000..5c0297df
--- /dev/null
+++ b/encoding/functions/dist_syncbn.py
@@ -0,0 +1,106 @@
+##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+## Created by: Hang Zhang
+## Email: zhanghang0704@gmail.com
+## Copyright (c) 2020
+##
+## LICENSE file in the root directory of this source tree
+##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+
+import torch
+from torch.autograd.function import Function
+from .. import lib
+
+__all__ = ['dist_syncbatchnorm']
+
+class dist_syncbatchnorm_(Function):
+ @staticmethod
+ def forward(ctx, x, gamma, beta, running_mean, running_var, eps, momentum, training, process_group):
+ x = x.contiguous()
+ ctx.training = training
+ ctx.momentum = momentum
+ ctx.eps = eps
+ ctx.process_group = process_group
+
+ if not ctx.training:
+ _ex, _var = running_mean.contiguous(), running_var.contiguous()
+ _exs = _var + _ex ** 2
+ if x.is_cuda:
+ y = lib.gpu.batchnorm_forward(x, _ex, _exs, gamma, beta, ctx.eps)
+ else:
+ y = lib.cpu.batchnorm_forward(x, _ex, _exs, gamma, beta, ctx.eps)
+ ctx.save_for_backward(x, _ex, _exs, gamma, beta)
+ return y
+
+ size = x.numel() // x.size(1)
+ if size == 1:
+ raise ValueError('Expected more than 1 value per channel when training, got input size {}'.format(size))
+
+ if x.is_cuda:
+ _ex, _exs = lib.gpu.expectation_forward(x)
+ else:
+ raise NotImplemented
+
+ count = torch.Tensor([1]).to(x.device)
+ count_all_reduce = torch.distributed.all_reduce(count, group=process_group, async_op=True)
+ _ex_all_reduce = torch.distributed.all_reduce(_ex, group=process_group, async_op=True)
+ _exs_all_reduce = torch.distributed.all_reduce(_exs, group=process_group, async_op=True)
+
+ count_all_reduce.wait()
+ _ex_all_reduce.wait()
+ _exs_all_reduce.wait()
+
+ _ex = _ex / count
+ _exs = _exs / count
+
+ # Update running stats
+ _var = _exs - _ex ** 2
+ running_mean.mul_((1 - ctx.momentum)).add_(ctx.momentum * _ex)
+ running_var.mul_((1 - ctx.momentum)).add_(ctx.momentum * _var)
+
+ # Mark in-place modified tensors
+ ctx.mark_dirty(running_mean, running_var)
+
+ # BN forward + activation
+ if x.is_cuda:
+ y = lib.gpu.batchnorm_forward(x, _ex, _exs, gamma, beta, ctx.eps)
+ else:
+ y = lib.cpu.batchnorm_forward(x, _ex, _exs, gamma, beta, ctx.eps)
+
+ ctx.save_for_backward(x, _ex, _exs, gamma, beta)
+ return y
+
+ @staticmethod
+ def backward(ctx, dz):
+ x, _ex, _exs, gamma, beta = ctx.saved_tensors
+ dz = dz.contiguous()
+
+ # BN backward
+ if dz.is_cuda:
+ dx, _dex, _dexs, dgamma, dbeta = \
+ lib.gpu.batchnorm_backward(dz, x, _ex, _exs, gamma, beta, ctx.eps)
+ else:
+ raise NotImplemented
+
+ if ctx.training:
+ process_group = ctx.process_group
+ count = torch.Tensor([1]).to(x.device)
+ count_all_reduce = torch.distributed.all_reduce(count, group=process_group, async_op=True)
+ _dex_all_reduce = torch.distributed.all_reduce(_dex, group=process_group, async_op=True)
+ _dexs_all_reduce = torch.distributed.all_reduce(_dexs, group=process_group, async_op=True)
+
+ count_all_reduce.wait()
+ _dex_all_reduce.wait()
+ _dexs_all_reduce.wait()
+
+ _dex = _dex / count
+ _dexs = _dexs / count
+
+ if x.is_cuda:
+ dx_ = lib.gpu.expectation_backward(x, _dex, _dexs)
+ else:
+ raise NotImplemented
+ dx = dx + dx_
+
+ return dx, dgamma, dbeta, None, None, None, None, None, None
+
+dist_syncbatchnorm = dist_syncbatchnorm_.apply
diff --git a/encoding/functions/rectify.py b/encoding/functions/rectify.py
new file mode 100644
index 00000000..75eb175b
--- /dev/null
+++ b/encoding/functions/rectify.py
@@ -0,0 +1,47 @@
+##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+## Created by: Hang Zhang
+## Email: zhanghang0704@gmail.com
+## Copyright (c) 2020
+##
+## LICENSE file in the root directory of this source tree
+##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+
+"""Rectify function"""
+import torch
+from torch.autograd import Function
+
+from .. import lib
+
+__all__ = ['rectify']
+
+class _rectify(Function):
+ @staticmethod
+ def forward(ctx, y, x, kernel_size, stride, padding, dilation, average):
+ ctx.save_for_backward(x)
+ # assuming kernel_size is 3
+ kernel_size = [k + 2 * (d - 1) for k,d in zip(kernel_size, dilation)]
+ ctx.kernel_size = kernel_size
+ ctx.stride = stride
+ ctx.padding = padding
+ ctx.dilation = dilation
+ ctx.average = average
+ if x.is_cuda:
+ lib.gpu.conv_rectify(y, x, kernel_size, stride, padding, dilation, average)
+ else:
+ lib.cpu.conv_rectify(y, x, kernel_size, stride, padding, dilation, average)
+ ctx.mark_dirty(y)
+ return y
+
+ @staticmethod
+ def backward(ctx, grad_y):
+ x, = ctx.saved_variables
+ if x.is_cuda:
+ lib.gpu.conv_rectify(grad_y, x, ctx.kernel_size, ctx.stride,
+ ctx.padding, ctx.dilation, ctx.average)
+ else:
+ lib.cpu.conv_rectify(grad_y, x, ctx.kernel_size, ctx.stride,
+ ctx.padding, ctx.dilation, ctx.average)
+ ctx.mark_dirty(grad_y)
+ return grad_y, None, None, None, None, None, None
+
+rectify = _rectify.apply
diff --git a/encoding/functions/syncbn.py b/encoding/functions/syncbn.py
index e989f4a1..43154ef3 100644
--- a/encoding/functions/syncbn.py
+++ b/encoding/functions/syncbn.py
@@ -10,7 +10,7 @@
"""Synchronized Cross-GPU Batch Normalization functions"""
import torch
import torch.cuda.comm as comm
-from torch.autograd import Variable, Function
+from torch.autograd import Function
from torch.autograd.function import once_differentiable
from .. import lib
diff --git a/encoding/lib/__init__.py b/encoding/lib/__init__.py
index 5675dfc6..6d703776 100644
--- a/encoding/lib/__init__.py
+++ b/encoding/lib/__init__.py
@@ -12,6 +12,7 @@
os.path.join(cpu_path, 'syncbn_cpu.cpp'),
os.path.join(cpu_path, 'roi_align_cpu.cpp'),
os.path.join(cpu_path, 'nms_cpu.cpp'),
+ os.path.join(cpu_path, 'rectify_cpu.cpp'),
], build_directory=cpu_path, verbose=False)
if torch.cuda.is_available():
@@ -19,9 +20,9 @@
os.path.join(gpu_path, 'operator.cpp'),
os.path.join(gpu_path, 'activation_kernel.cu'),
os.path.join(gpu_path, 'encoding_kernel.cu'),
- os.path.join(gpu_path, 'encodingv2_kernel.cu'),
os.path.join(gpu_path, 'syncbn_kernel.cu'),
os.path.join(gpu_path, 'roi_align_kernel.cu'),
os.path.join(gpu_path, 'nms_kernel.cu'),
+ os.path.join(gpu_path, 'rectify_cuda.cu'),
], extra_cuda_cflags=["--expt-extended-lambda"],
build_directory=gpu_path, verbose=False)
diff --git a/encoding/lib/cpu/operator.cpp b/encoding/lib/cpu/operator.cpp
index a74bd991..9c5b28c2 100644
--- a/encoding/lib/cpu/operator.cpp
+++ b/encoding/lib/cpu/operator.cpp
@@ -12,4 +12,5 @@ PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
m.def("sumsquare_forward", &Sum_Square_Forward_CPU, "SumSqu forward (CPU)");
m.def("sumsquare_backward", &Sum_Square_Backward_CPU, "SumSqu backward (CPU)");
m.def("non_max_suppression", &Non_Max_Suppression_CPU, "NMS (CPU)");
+ m.def("conv_rectify", &CONV_RECTIFY_CPU, "Convolution Rectifier (CPU)");
}
diff --git a/encoding/lib/cpu/operator.h b/encoding/lib/cpu/operator.h
index 4e1a48c5..a0642155 100644
--- a/encoding/lib/cpu/operator.h
+++ b/encoding/lib/cpu/operator.h
@@ -72,3 +72,12 @@ std::vector Non_Max_Suppression_CPU(
const at::Tensor& input,
const at::Tensor& scores,
double thresh);
+
+void CONV_RECTIFY_CPU(
+ at::Tensor& output,
+ const at::Tensor& input,
+ at::IntArrayRef kernel_size,
+ at::IntArrayRef stride,
+ at::IntArrayRef padding,
+ at::IntArrayRef dilation,
+ bool avg_mode);
diff --git a/encoding/lib/cpu/rectify_cpu.cpp b/encoding/lib/cpu/rectify_cpu.cpp
new file mode 100644
index 00000000..2f464fc1
--- /dev/null
+++ b/encoding/lib/cpu/rectify_cpu.cpp
@@ -0,0 +1,234 @@
+#include
+#include
+#include
+#include
+#include
+
+#include
+#include
+#include
+#include
+
+template
+static inline dest_t safe_downcast(src_t v)
+{
+ TORCH_CHECK(std::numeric_limits::min() <= v && v <= std::numeric_limits::max(),
+ "integer out of range");
+
+ return static_cast(v);
+}
+
+
+template
+static inline T pooling_output_shape_pad_lr(
+ T inputSize, T kernelSize, T pad_l, T pad_r, T stride, T dilation,
+ bool ceil_mode) {
+ T outputSize = div_rtn(
+ inputSize + pad_l + pad_r - dilation * (kernelSize - 1) - 1 +
+ (ceil_mode ? stride - 1 : 0), stride) + 1;
+ if (pad_l) {
+ // ensure that the last pooling starts inside the image
+ // needed to avoid problems in ceil mode
+ if ((outputSize - 1) * stride >= inputSize + pad_l)
+ --outputSize;
+ }
+ return outputSize;
+}
+
+template
+static inline T pooling_output_shape(
+ T inputSize, T kernelSize, T pad, T stride, T dilation, bool ceil_mode) {
+ return pooling_output_shape_pad_lr(
+ inputSize, kernelSize, pad, pad, stride, dilation, ceil_mode);
+}
+
+static inline void pool2d_shape_check(
+ const at::Tensor& input,
+ int kH, int kW, int dH, int dW, int padH, int padW, int dilationH, int dilationW,
+ int64_t nInputPlane,
+ int64_t inputHeight, int64_t inputWidth,
+ int64_t outputHeight, int64_t outputWidth)
+{
+ const int64_t ndim = input.ndimension();
+ const int64_t nOutputPlane = nInputPlane;
+
+ TORCH_CHECK(kW > 0 && kH > 0,
+ "kernel size should be greater than zero, but got ",
+ "kH: ", kH, " kW: ", kW);
+ TORCH_CHECK(dW > 0 && dH > 0,
+ "stride should be greater than zero, but got "
+ "dH: ", dH, " dW: ", dW);
+ TORCH_CHECK(dilationH > 0 && dilationW > 0,
+ "dilation should be greater than zero, but got ",
+ "dilationH: ", dilationH, " dilationW: ", dilationW);
+
+ TORCH_CHECK(input.numel() > 0 && (ndim == 3 || ndim == 4),
+ "non-empty 3D or 4D input tensor expected but got ndim: ", ndim);
+ //TORCH_CHECK(kW/2 >= padW && kH/2 >= padH,
+ // "pad should be smaller than half of kernel size, but got ",
+ // "padW = ", padW, ", padH = ", padH, ", kW = ", kW, ", kH = ", kH);
+
+ TORCH_CHECK(outputWidth >= 1 && outputHeight >= 1,
+ "Given input size: (",
+ nInputPlane, "x", inputHeight, "x", inputWidth, "). ",
+ "Calculated output size: (",
+ nOutputPlane, "x", outputHeight, "x", outputWidth, "). ",
+ "Output size is too small");
+}
+
+
+template
+static void conv_rectify_cpu_frame(
+ scalar_t *output_data,
+ int64_t nbatch,
+ int64_t nInputPlane,
+ int64_t inputWidth,
+ int64_t inputHeight,
+ int64_t outputWidth,
+ int64_t outputHeight,
+ int kW,
+ int kH,
+ int dW,
+ int dH,
+ int padW,
+ int padH,
+ const int dilation_h,
+ const int dilation_w,
+ bool average_mode) {
+ //at::parallel_for(0, nInputPlane, 0, [&](int64_t start, int64_t end) {
+ for (int64_t k = 0; k < nInputPlane; k++) {
+ int64_t p;
+ for(p = 0; p < nbatch; p++)
+ {
+ int64_t xx, yy;
+ /* For all output pixels... */
+ scalar_t *ptr_output = output_data + p*nInputPlane*outputWidth*outputHeight + k*outputWidth*outputHeight;
+ //int64_t i;
+
+ for(yy = 0; yy < outputHeight; yy++)
+ {
+ for(xx = 0; xx < outputWidth; xx++)
+ {
+ /* Compute the mean of the input image... */
+ int64_t hstart = yy * dH - padH;
+ int64_t wstart = xx * dW - padW;
+ int64_t hend = std::min(hstart + kH, inputHeight + padH);
+ int64_t wend = std::min(wstart + kW, inputWidth + padW);
+ //int pool_size = (hend - hstart) * (wend - wstart);
+ int pool_size = ((kH - 1) / dilation_h + 1) * ((kW - 1) / dilation_w + 1);
+ hstart = std::max(hstart, (int64_t) 0);
+ wstart = std::max(wstart, (int64_t) 0);
+ hend = std::min(hend, inputHeight);
+ wend = std::min(wend, inputWidth);
+ int hcount = int(((hend - hstart) - 1) / dilation_h + 1);
+ int wcount = int(((wend - wstart) - 1) / dilation_w + 1);
+
+ scalar_t mul_factor;
+ if (average_mode) {
+ mul_factor = scalar_t(1.0) / (hcount * wcount);
+ }
+ else {
+ mul_factor = scalar_t(1.0) * pool_size / (hcount * wcount);
+ }
+ *ptr_output++ *= mul_factor;
+ }
+ }
+ }
+ }
+ //});
+}
+
+void conv_rectify_cpu_tempalte(
+ at::Tensor &output,
+ const at::Tensor &input_,
+ at::IntArrayRef kernel_size,
+ at::IntArrayRef stride,
+ at::IntArrayRef padding,
+ at::IntArrayRef dilation,
+ bool average_mode)
+{
+ // #20866, #22032: Guarantee this for the official C++ API?
+ TORCH_CHECK(kernel_size.size() == 1 || kernel_size.size() == 2,
+ "conv_rectify: kernel_size must either be a single int, or a tuple of two ints");
+ const int kH = safe_downcast(kernel_size[0]);
+ const int kW = kernel_size.size() == 1 ? kH : safe_downcast(kernel_size[1]);
+
+ TORCH_CHECK(stride.empty() || stride.size() == 1 || stride.size() == 2,
+ "conv_rectify: stride must either be omitted, a single int, or a tuple of two ints");
+ const int dH = stride.empty() ? kH : safe_downcast(stride[0]);
+ const int dW = stride.empty() ? kW :
+ stride.size() == 1 ? dH : safe_downcast(stride[1]);
+
+ TORCH_CHECK(padding.size() == 1 || padding.size() == 2,
+ "conv_rectify: padding must either be a single int, or a tuple of two ints");
+ const int padH = safe_downcast(padding[0]);
+ const int padW = padding.size() == 1 ? padH : safe_downcast(padding[1]);
+
+ TORCH_CHECK(dilation.size() == 1 || dilation.size() == 2,
+ "rectify: dilation must either be a single int, or a tuple of two ints");
+ const int dilationH = safe_downcast(dilation[0]);
+ const int dilationW = dilation.size() == 1 ? dilationH : safe_downcast(dilation[1]);
+
+ TORCH_CHECK((input_.ndimension() == 3 || input_.ndimension() == 4),
+ "non-empty 2D or 3D (batch mode) tensor expected for input");
+
+ /* sizes */
+ const int64_t nbatch = input_.ndimension() == 4 ? input_.size(-4) : 1;
+ const int64_t nInputPlane = input_.size(-3);
+ const int64_t inputHeight = input_.size(-2);
+ const int64_t inputWidth = input_.size(-1);
+
+ //const int64_t outputHeight = pooling_output_shape(inputHeight, kH, padH, dH, dilationH, false);
+ //const int64_t outputWidth = pooling_output_shape(inputWidth, kW, padW, dW, dilationW, false);
+ const int64_t outputHeight = output.size(-2);
+ const int64_t outputWidth = output.size(-1);
+
+ pool2d_shape_check(
+ input_,
+ kH, kW, dH, dW, padH, padW, dilationH, dilationW,
+ nInputPlane,
+ inputHeight, inputWidth,
+ outputHeight, outputWidth);
+
+ TORCH_CHECK(output.is_contiguous(), "conv_rectify: output must be contiguous");
+
+ at::Tensor input = input_.contiguous();
+
+ AT_DISPATCH_FLOATING_TYPES(input.type(), "conv_rectify_cuda_frame", ([&] {
+ scalar_t *output_data = output.data_ptr();
+ conv_rectify_cpu_frame(
+ output_data,
+ nbatch,
+ nInputPlane,
+ inputWidth, inputHeight,
+ outputWidth, outputHeight,
+ kW, kH,
+ dW, dH,
+ padW, padH,
+ dilationH,
+ dilationW,
+ average_mode);
+ }
+ ));
+}
+
+void CONV_RECTIFY_CPU(
+ at::Tensor& output,
+ const at::Tensor& input,
+ at::IntArrayRef kernel_size,
+ at::IntArrayRef stride,
+ at::IntArrayRef padding,
+ at::IntArrayRef dilation,
+ bool average) {
+ //at::Tensor output = at::empty({0}, input.options());
+ conv_rectify_cpu_tempalte(
+ output,
+ input,
+ kernel_size,
+ stride,
+ padding,
+ dilation,
+ average);
+}
+
+
diff --git a/encoding/lib/gpu/activation_kernel.cu b/encoding/lib/gpu/activation_kernel.cu
index d58118d7..c14bcada 100644
--- a/encoding/lib/gpu/activation_kernel.cu
+++ b/encoding/lib/gpu/activation_kernel.cu
@@ -1,12 +1,11 @@
-#include
#include
#include
-// #include
+#include
#include
-
#include
#include
+#include "common.h"
namespace {
diff --git a/encoding/lib/gpu/encodingv2_kernel.cu b/encoding/lib/gpu/encodingv2_kernel.cu
deleted file mode 100644
index 068c2bd5..00000000
--- a/encoding/lib/gpu/encodingv2_kernel.cu
+++ /dev/null
@@ -1,427 +0,0 @@
-#include
-#include
-#include
-#include
-#include
-
-#include "common.h"
-#include "device_tensor.h"
-
-namespace {
-
-template
-struct KD2Op {
- __device__ KD2Op(DeviceTensor x,
- DeviceTensor c,
- DeviceTensor std) : X(x), C(c), STD(std) {}
- __device__ __forceinline__ Acctype operator()(int b, int i, int k, int d)
- {
- DType r = (X[b][i][d] - C[k][d]) / STD[k][d];
- return ScalarConvert::to(r * r);
- }
- DeviceTensor X;
- DeviceTensor C;
- DeviceTensor STD;
-};
-
-template
-__global__ void Encoding_Dist_Forward_kernel (
- DeviceTensor KD,
- DeviceTensor X,
- DeviceTensor C,
- DeviceTensor STD) {
- /* declarations of the variables */
- int b, k, i, D;
- /* Get the index and channels */
- b = blockIdx.z;
- k = blockIdx.x;
- i = blockIdx.y;
- D = X.getSize(2);
- /* main operation */
- KD2Op g(X, C, STD);
- KD[b][i][k] = reduceD(g, b, i, k, D);;
-}
-
-template
-struct EncGradXOp {
- __device__ EncGradXOp(
- DeviceTensor gkd,
- DeviceTensor x,
- DeviceTensor c,
- DeviceTensor std) : GKD(gkd), X(x), C(c), STD(std) {}
- // DeviceTensor s, S(s)
- __device__ __forceinline__ Acctype operator()(int b, int i, int k, int d) {
- return ScalarConvert::to(
- 2 * GKD[b][i][k] * (X[b][i][d] - C[k][d]) /
- (STD[k][d] * STD[k][d]));
- }
- DeviceTensor GKD;
- DeviceTensor X;
- DeviceTensor C;
- DeviceTensor STD;
- // DeviceTensor S;
-};
-
-template
-__global__ void Encoding_GradX_kernel (
- DeviceTensor GKD,
- DeviceTensor GX,
- DeviceTensor X,
- DeviceTensor C,
- DeviceTensor STD) {
- // DeviceTensor S
- /* declarations of the variables */
- int b, d, i, K;
- /* Get the index and channels */
- b = blockIdx.z;
- i = blockIdx.y;
- d = blockIdx.x;
- K = C.getSize(0);
- /* main operation */
- EncGradXOp g(GKD, X, C, STD);
- GX[b][i][d] = reduceK(g, b, i, d, K);
-}
-
-template
-struct EncGradSTDOp {
- __device__ EncGradSTDOp(
- DeviceTensor gkd,
- DeviceTensor x,
- DeviceTensor c,
- DeviceTensor std) : GKD(gkd), X(x), C(c), STD(std) {}
- // DeviceTensor s, S(s)
- __device__ __forceinline__ Acctype operator()(int b, int i, int k, int d) {
- return ScalarConvert::to(
- -2 * GKD[b][i][k] * (X[b][i][d] - C[k][d]) *
- (X[b][i][d] - C[k][d]) / (STD[k][d] * STD[k][d] * STD[k][d]));
- }
- DeviceTensor GKD;
- DeviceTensor X;
- DeviceTensor C;
- DeviceTensor STD;
- // DeviceTensor S;
-};
-
-template
-__global__ void Encoding_GradCSTD_kernel (
- DeviceTensor GKD,
- DeviceTensor GC,
- DeviceTensor GSTD,
- DeviceTensor X,
- DeviceTensor C,
- DeviceTensor STD) {
- /* declarations of the variables */
- int k, d, B, N;
- /* Get the index and channels */
- d = blockIdx.x;
- k = blockIdx.y;
- B = X.getSize(0);
- N = X.getSize(1);
- /* main operation */
- EncGradXOp g1(GKD, X, C, STD);
- EncGradSTDOp g2(GKD, X, C, STD);
- GC[k][d] = -reduceBN(g1, k, d, B, N);
- GSTD[k][d] += reduceBN(g2, k, d, B, N);
-}
-
-template
-struct EncGradSTDXOp {
- __device__ EncGradSTDXOp(
- DeviceTensor gstd,
- DeviceTensor x,
- DeviceTensor c,
- DeviceTensor std) : GSTD(gstd), X(x), C(c), STD(std) {}
- __device__ __forceinline__ Acctype operator()(int b, int i, int k, int d) {
- return ScalarConvert::to(
- GSTD[k][d] * (X[b][i][d] - C[k][d]) / STD[k][d]);
- }
- DeviceTensor GSTD;
- DeviceTensor X;
- DeviceTensor C;
- DeviceTensor STD;
-};
-
-template
-__global__ void Encoding_GradSTDX_kernel (
- DeviceTensor GSTD,
- DeviceTensor GX,
- DeviceTensor X,
- DeviceTensor C,
- DeviceTensor STD,
- int N) {
- /* declarations of the variables */
- int b, d, i, K;
- /* Get the index and channels */
- b = blockIdx.z;
- i = blockIdx.y;
- d = blockIdx.x;
- K = C.getSize(0);
- /* main operation */
- EncGradSTDXOp g(GSTD, X, C, STD);
- GX[b][i][d] += reduceK(g, b, i, d, K) / N;
-}
-
-template
-struct AggOpV2 {
- __device__ AggOpV2(DeviceTensor a,
- DeviceTensor x,
- DeviceTensor c,
- DeviceTensor std) : A(a), X(x), C(c), STD(std) {}
- __device__ __forceinline__ Acctype operator()(int b, int i, int k, int d) {
- return ScalarConvert::to(A[b][i][k] * (X[b][i][d] - C[k][d]) /
- STD[k][d]);
- }
- DeviceTensor A;
- DeviceTensor X;
- DeviceTensor C;
- DeviceTensor STD;
-};
-
-template
-__global__ void AggregateV2_Forward_kernel (
- DeviceTensor E,
- DeviceTensor A,
- DeviceTensor X,
- DeviceTensor C,
- DeviceTensor STD) {
- /* declarations of the variables */
- int b, k, d, N;
- /* Get the index and channels */
- b = blockIdx.z;
- d = blockIdx.x;
- k = blockIdx.y;
- N = X.getSize(1);
- /* main operation */
- AggOpV2 g(A, X, C, STD);
- E[b][k][d] = reduceN(g, b, k, d, N);
-}
-
-template
-struct AggV2BackOp {
- __device__ AggV2BackOp(DeviceTensor g,
- DeviceTensor x,
- DeviceTensor c,
- DeviceTensor std) : G(g), X(x), C(c), STD(std) {}
- __device__ __forceinline__ Acctype operator()(int b, int i, int k, int d) {
- return ScalarConvert::to(G[b][k][d] * (X[b][i][d] - C[k][d]) /
- STD[k][d]);
- }
- DeviceTensor G;
- DeviceTensor X;
- DeviceTensor C;
- DeviceTensor STD;
-};
-
-template
-__global__ void AggregateV2_Backward_kernel (
- DeviceTensor GA,
- DeviceTensor GE,
- DeviceTensor A,
- DeviceTensor X,
- DeviceTensor C,
- DeviceTensor STD) {
- /* declarations of the variables */
- int b, k, i, D;
- /* Get the index and channels */
- b = blockIdx.z;
- i = blockIdx.y;
- k = blockIdx.x;
- D = GE.getSize(2);
- /* main operation */
- AggV2BackOp g(GE, X, C, STD);
- GA[b][i][k] = reduceD(g, b, i, k, D);
-}
-
-} // namespace
-
-at::Tensor Encoding_Dist_Inference_Forward_CUDA(
- const at::Tensor X_,
- const at::Tensor C_,
- const at::Tensor STD_) {
- // const at::Tensor S_,
- // X \in R^{B, N, D}, C \in R^{K, D}, S \in R^K
- auto KD_ = torch::zeros({X_.size(0), X_.size(1), C_.size(0)}, X_.options());
- // E(x), E(x^2)
- int N = X_.size(0) * X_.size(1);
- cudaStream_t stream = at::cuda::getCurrentCUDAStream();
- dim3 blocks(C_.size(0), X_.size(1), X_.size(0));
- dim3 threads(getNumThreads(C_.size(1)));
- // calculate the kernel distance
- AT_DISPATCH_FLOATING_TYPES(X_.type(), "Encoding_Dist_Inference_Forward_CUDA", ([&] {
- /* Device tensors */
- DeviceTensor KD = devicetensor(KD_);
- DeviceTensor X = devicetensor(X_);
- DeviceTensor C = devicetensor(C_);
- DeviceTensor STD = devicetensor(STD_);
- /* kernel function */
- Encoding_Dist_Forward_kernel
- <<>> (KD, X, C, STD);
- }));
- AT_ASSERT(cudaGetLastError() == cudaSuccess);
- return KD_;
-}
-
-std::vector Encoding_Dist_Inference_Backward_CUDA(
- const at::Tensor GKD_,
- const at::Tensor KD_,
- const at::Tensor X_,
- const at::Tensor C_,
- const at::Tensor STD_) {
- auto GX_ = at::zeros_like(X_);
- auto GC_ = at::zeros_like(C_);
- auto GSTD_ = at::zeros_like(STD_);
- /* kernel function */
- cudaStream_t stream = at::cuda::getCurrentCUDAStream();
- dim3 blocks1(X_.size(2), X_.size(1), X_.size(0));
- dim3 threads1(getNumThreads(C_.size(0)));
- dim3 blocks2(C_.size(1), C_.size(0));
- dim3 threads2(getNumThreads(X_.size(1)));
- int N = X_.size(0) * X_.size(1);
- AT_DISPATCH_FLOATING_TYPES(X_.type(), "Encoding_Dist_Backward_CUDA", ([&] {
- /* Device tensors */
- DeviceTensor GKD = devicetensor(GKD_);
- DeviceTensor GSTD = devicetensor(GSTD_);
- DeviceTensor GX = devicetensor(GX_);
- DeviceTensor GC = devicetensor(GC_);
- DeviceTensor X = devicetensor(X_);
- DeviceTensor C = devicetensor(C_);
- DeviceTensor STD = devicetensor(STD_);
- Encoding_GradX_kernel
- <<>> (GKD, GX, X, C, STD);
- AT_ASSERT(cudaGetLastError() == cudaSuccess);
- Encoding_GradCSTD_kernel
- <<>> (GKD, GC, GSTD, X, C, STD);
- AT_ASSERT(cudaGetLastError() == cudaSuccess);
- }));
- return {GX_, GC_, GSTD_};
-}
-
-std::vector Encoding_Dist_Forward_CUDA(
- const at::Tensor X_,
- const at::Tensor C_,
- double eps) {
- // const at::Tensor S_,
- // X \in R^{B, N, D}, C \in R^{K, D}, S \in R^K
- auto KD_ = torch::zeros({X_.size(0), X_.size(1), C_.size(0)}, X_.options());
- // E(x), E(x^2)
- int N = X_.size(0) * X_.size(1);
- auto SVar_ = (X_.pow(2).sum(0).sum(0).view({1, X_.size(2)}) -
- 2 * C_ * X_.sum(0).sum(0).view({1, X_.size(2)})).expand_as(C_) +
- C_.pow(2) * N;
- auto STD_ = at::sqrt(SVar_ / N + eps);
- cudaStream_t stream = at::cuda::getCurrentCUDAStream();
- dim3 blocks(C_.size(0), X_.size(1), X_.size(0));
- dim3 threads(getNumThreads(C_.size(1)));
- // calculate the kernel distance
- AT_DISPATCH_FLOATING_TYPES(X_.type(), "Encoding_Dist_Forward_CUDA", ([&] {
- /* Device tensors */
- DeviceTensor KD = devicetensor(KD_);
- DeviceTensor X = devicetensor(X_);
- DeviceTensor C = devicetensor(C_);
- DeviceTensor STD = devicetensor(STD_);
- /* kernel function */
- Encoding_Dist_Forward_kernel
- <<>> (KD, X, C, STD);
- }));
- AT_ASSERT(cudaGetLastError() == cudaSuccess);
- return {KD_, STD_, SVar_ / (N - 1)};
-}
-
-std::vector Encoding_Dist_Backward_CUDA(
- const at::Tensor GKD_,
- const at::Tensor GSTD_,
- const at::Tensor KD_,
- const at::Tensor X_,
- const at::Tensor C_,
- const at::Tensor STD_) {
- auto GX_ = at::zeros_like(X_);
- auto GC_ = at::zeros_like(C_);
- auto GSTD2_ = GSTD_.clone();
- /* kernel function */
- cudaStream_t stream = at::cuda::getCurrentCUDAStream();
- dim3 blocks1(X_.size(2), X_.size(1), X_.size(0));
- dim3 threads1(getNumThreads(C_.size(0)));
- dim3 blocks2(C_.size(1), C_.size(0));
- dim3 threads2(getNumThreads(X_.size(1)));
- int N = X_.size(0) * X_.size(1);
- AT_DISPATCH_FLOATING_TYPES(X_.type(), "Encoding_Dist_Backward_CUDA", ([&] {
- /* Device tensors */
- DeviceTensor GKD = devicetensor(GKD_);
- DeviceTensor GSTD = devicetensor(GSTD2_);
- DeviceTensor GX = devicetensor(GX_);
- DeviceTensor GC = devicetensor(GC_);
- DeviceTensor X = devicetensor(X_);
- DeviceTensor C = devicetensor(C_);
- DeviceTensor STD = devicetensor(STD_);
- Encoding_GradX_kernel
- <<>> (GKD, GX, X, C, STD);
- AT_ASSERT(cudaGetLastError() == cudaSuccess);
- Encoding_GradCSTD_kernel
- <<>> (GKD, GC, GSTD, X, C, STD);
- AT_ASSERT(cudaGetLastError() == cudaSuccess);
- Encoding_GradSTDX_kernel
- <<>> (GSTD, GX, X, C, STD, N);
- AT_ASSERT(cudaGetLastError() == cudaSuccess);
- }));
- // d_sigma/d_c
- GC_ = GC_ - GSTD2_ * (X_.mean(0).mean(0) - C_) / STD_;
- return {GX_, GC_};
-}
-
-at::Tensor AggregateV2_Forward_CUDA(
- const at::Tensor A_,
- const at::Tensor X_,
- const at::Tensor C_,
- const at::Tensor STD_) {
- /* Device tensors */
- auto E_ = torch::zeros({A_.size(0), C_.size(0), C_.size(1)}, A_.options());
- // auto IS_ = 1.0f / (S_ + eps).sqrt();
- cudaStream_t stream = at::cuda::getCurrentCUDAStream();
- // B, K, D
- dim3 blocks(C_.size(1), C_.size(0), X_.size(0));
- dim3 threads(getNumThreads(X_.size(1)));
-
- AT_DISPATCH_FLOATING_TYPES(A_.type(), "Aggregate_Forward_CUDA", ([&] {
- DeviceTensor E = devicetensor(E_);
- DeviceTensor A = devicetensor(A_);
- DeviceTensor X = devicetensor(X_);
- DeviceTensor C = devicetensor(C_);
- DeviceTensor STD = devicetensor(STD_);
- /* kernel function */
- AggregateV2_Forward_kernel
- <<>>(E, A, X, C, STD);
- }));
- AT_ASSERT(cudaGetLastError() == cudaSuccess);
- return E_;
-}
-
-std::vector AggregateV2_Backward_CUDA(
- const at::Tensor GE_,
- const at::Tensor E_,
- const at::Tensor A_,
- const at::Tensor X_,
- const at::Tensor C_,
- const at::Tensor STD_) {
- auto gradA_ = at::zeros_like(A_);
- auto gradX_ = at::bmm(A_ , (GE_ / STD_.unsqueeze(0)));
- auto gradC_ = -(A_.sum(1).unsqueeze(2) * GE_ / STD_.unsqueeze(0)).sum(0);
- auto gradSTD_ = -(GE_ * E_).sum(0) / STD_;
- // auto gradS_ = -0.5 * (GE_ * E_).sum(2).sum(0) / S_;
- cudaStream_t stream = at::cuda::getCurrentCUDAStream();
- // B, K, D
- dim3 blocks(C_.size(0), X_.size(1), X_.size(0));
- dim3 threads(getNumThreads(C_.size(1)));
- AT_DISPATCH_FLOATING_TYPES(A_.type(), "Aggregate_Backward_CUDA", ([&] {
- /* Device tensors */
- DeviceTensor GA = devicetensor(gradA_);
- DeviceTensor GE = devicetensor(GE_);
- DeviceTensor A = devicetensor(A_);
- DeviceTensor X = devicetensor(X_);
- DeviceTensor C = devicetensor(C_);
- DeviceTensor STD = devicetensor(STD_);
- AggregateV2_Backward_kernel
- <<>> (GA, GE, A, X, C, STD);
- }));
- AT_ASSERT(cudaGetLastError() == cudaSuccess);
- return {gradA_, gradX_, gradC_, gradSTD_};
-}
diff --git a/encoding/lib/gpu/operator.cpp b/encoding/lib/gpu/operator.cpp
index 5d21a16e..e51352d4 100644
--- a/encoding/lib/gpu/operator.cpp
+++ b/encoding/lib/gpu/operator.cpp
@@ -16,14 +16,7 @@ PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
m.def("expectation_backward", &Expectation_Backward_CUDA, "Expectation backward (CUDA)");
m.def("expectation_inp_backward", &Expectation_Inp_Backward_CUDA,
"Inplace Expectation backward (CUDA)");
- m.def("encoding_dist_forward", &Encoding_Dist_Forward_CUDA, "EncDist forward (CUDA)");
- m.def("encoding_dist_backward", &Encoding_Dist_Backward_CUDA, "Assign backward (CUDA)");
- m.def("encoding_dist_inference_forward", &Encoding_Dist_Inference_Forward_CUDA,
- "EncDist Inference forward (CUDA)");
- m.def("encoding_dist_inference_backward", &Encoding_Dist_Inference_Backward_CUDA,
- "Assign Inference backward (CUDA)");
- m.def("aggregatev2_forward", &AggregateV2_Forward_CUDA, "AggregateV2 forward (CUDA)");
- m.def("aggregatev2_backward", &AggregateV2_Backward_CUDA, "AggregateV2 backward (CUDA)");
m.def("leaky_relu_forward", &LeakyRelu_Forward_CUDA, "Learky ReLU forward (CUDA)");
m.def("leaky_relu_backward", &LeakyRelu_Backward_CUDA, "Learky ReLU backward (CUDA)");
+ m.def("conv_rectify", &CONV_RECTIFY_CUDA, "Convolution Rectifier (CUDA)");
}
diff --git a/encoding/lib/gpu/operator.h b/encoding/lib/gpu/operator.h
index 64dbe1de..01ec9b48 100644
--- a/encoding/lib/gpu/operator.h
+++ b/encoding/lib/gpu/operator.h
@@ -1,4 +1,5 @@
#include
+#include
#include
at::Tensor ROIAlign_Forward_CUDA(
@@ -102,45 +103,15 @@ at::Tensor Expectation_Inp_Backward_CUDA(
const at::Tensor beta_,
float eps);
-at::Tensor Encoding_Dist_Inference_Forward_CUDA(
- const at::Tensor X_,
- const at::Tensor C_,
- const at::Tensor STD_);
-
-std::vector Encoding_Dist_Inference_Backward_CUDA(
- const at::Tensor GKD_,
- const at::Tensor KD_,
- const at::Tensor X_,
- const at::Tensor C_,
- const at::Tensor STD_);
-
-std::vector Encoding_Dist_Forward_CUDA(
- const at::Tensor X,
- const at::Tensor C,
- double eps);
-
-std::vector Encoding_Dist_Backward_CUDA(
- const at::Tensor GKD_,
- const at::Tensor GSTD_,
- const at::Tensor KD_,
- const at::Tensor X_,
- const at::Tensor C_,
- const at::Tensor STD_);
-
-at::Tensor AggregateV2_Forward_CUDA(
- const at::Tensor A_,
- const at::Tensor X_,
- const at::Tensor C_,
- const at::Tensor STD_);
-
-std::vector AggregateV2_Backward_CUDA(
- const at::Tensor GE_,
- const at::Tensor E_,
- const at::Tensor A_,
- const at::Tensor X_,
- const at::Tensor C_,
- const at::Tensor STD_);
-
void LeakyRelu_Forward_CUDA(at::Tensor z, float slope);
void LeakyRelu_Backward_CUDA(at::Tensor z, at::Tensor dz, float slope);
+
+void CONV_RECTIFY_CUDA(
+ at::Tensor& output,
+ const at::Tensor& input,
+ at::IntArrayRef kernel_size,
+ at::IntArrayRef stride,
+ at::IntArrayRef padding,
+ at::IntArrayRef dilation,
+ bool avg_mode);
diff --git a/encoding/lib/gpu/rectify_cuda.cu b/encoding/lib/gpu/rectify_cuda.cu
new file mode 100644
index 00000000..6bf56d2f
--- /dev/null
+++ b/encoding/lib/gpu/rectify_cuda.cu
@@ -0,0 +1,222 @@
+#include