Skip to content

Commit

Permalink
Porting docs, examples, tutorials and galleries (#5620)
Browse files Browse the repository at this point in the history
* Fix examples, tutorials and gallery

* Update gallery/plot_optical_flow.py

Co-authored-by: Nicolas Hug <[email protected]>

* Fix import

* Revert hardcoded normalization

* fix uncommitted changes

* Fix bug

* Fix more bugs

* Making resize optional for segmentation

* Fixing preset

* Fix mypy

* Fixing documentation strings

* Fix flake8

* minor refactoring

Co-authored-by: Nicolas Hug <[email protected]>
  • Loading branch information
datumbox and NicolasHug authored Mar 15, 2022
1 parent 5a96c9a commit 6d96ed5
Show file tree
Hide file tree
Showing 20 changed files with 115 additions and 81 deletions.
13 changes: 10 additions & 3 deletions android/test_app/make_assets.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,18 @@
import torch
import torchvision
from torch.utils.mobile_optimizer import optimize_for_mobile
from torchvision.models.detection import (
fasterrcnn_mobilenet_v3_large_320_fpn,
FasterRCNN_MobileNet_V3_Large_320_FPN_Weights,
)

print(torch.__version__)

model = torchvision.models.detection.fasterrcnn_mobilenet_v3_large_320_fpn(
pretrained=True, box_score_thresh=0.7, rpn_post_nms_top_n_test=100, rpn_score_thresh=0.4, rpn_pre_nms_top_n_test=150
model = fasterrcnn_mobilenet_v3_large_320_fpn(
weights=FasterRCNN_MobileNet_V3_Large_320_FPN_Weights.DEFAULT,
box_score_thresh=0.7,
rpn_post_nms_top_n_test=100,
rpn_score_thresh=0.4,
rpn_pre_nms_top_n_test=150,
)

model.eval()
Expand Down
2 changes: 1 addition & 1 deletion examples/cpp/hello_world/trace_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
HERE = osp.dirname(osp.abspath(__file__))
ASSETS = osp.dirname(osp.dirname(HERE))

model = torchvision.models.resnet18(pretrained=False)
model = torchvision.models.resnet18()
model.eval()

traced_model = torch.jit.script(model)
Expand Down
34 changes: 15 additions & 19 deletions gallery/plot_optical_flow.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@
import torch
import matplotlib.pyplot as plt
import torchvision.transforms.functional as F
import torchvision.transforms as T


plt.rcParams["savefig.bbox"] = "tight"
Expand Down Expand Up @@ -88,24 +87,19 @@ def plot(imgs, **imshow_kwargs):
# reduce the image sizes for the example to run faster. Image dimension must be
# divisible by 8.

from torchvision.models.optical_flow import Raft_Large_Weights

def preprocess(batch):
transforms = T.Compose(
[
T.ConvertImageDtype(torch.float32),
T.Normalize(mean=0.5, std=0.5), # map [0, 1] into [-1, 1]
T.Resize(size=(520, 960)),
]
)
batch = transforms(batch)
return batch
weights = Raft_Large_Weights.DEFAULT
transforms = weights.transforms()


# If you can, run this example on a GPU, it will be a lot faster.
device = "cuda" if torch.cuda.is_available() else "cpu"
def preprocess(img1_batch, img2_batch):
img1_batch = F.resize(img1_batch, size=[520, 960])
img2_batch = F.resize(img2_batch, size=[520, 960])
return transforms(img1_batch, img2_batch)[:2]


img1_batch = preprocess(img1_batch).to(device)
img2_batch = preprocess(img2_batch).to(device)
img1_batch, img2_batch = preprocess(img1_batch, img2_batch)

print(f"shape = {img1_batch.shape}, dtype = {img1_batch.dtype}")

Expand All @@ -121,7 +115,10 @@ def preprocess(batch):

from torchvision.models.optical_flow import raft_large

model = raft_large(pretrained=True, progress=False).to(device)
# If you can, run this example on a GPU, it will be a lot faster.
device = "cuda" if torch.cuda.is_available() else "cpu"

model = raft_large(weights=Raft_Large_Weights.DEFAULT, progress=False).to(device)
model = model.eval()

list_of_flows = model(img1_batch.to(device), img2_batch.to(device))
Expand Down Expand Up @@ -182,10 +179,9 @@ def preprocess(batch):
# from torchvision.io import write_jpeg
# for i, (img1, img2) in enumerate(zip(frames, frames[1:])):
# # Note: it would be faster to predict batches of flows instead of individual flows
# img1 = preprocess(img1[None]).to(device)
# img2 = preprocess(img2[None]).to(device)
# img1, img2 = preprocess(img1, img2)

# list_of_flows = model(img1_batch, img2_batch)
# list_of_flows = model(img1.to(device), img1.to(device))
# predicted_flow = list_of_flows[-1][0]
# flow_img = flow_to_image(predicted_flow).to("cpu")
# output_folder = "/tmp/" # Update this to the folder of your choice
Expand Down
8 changes: 5 additions & 3 deletions gallery/plot_repurposing_annotations.py
Original file line number Diff line number Diff line change
Expand Up @@ -139,12 +139,14 @@ def show(imgs):
# Here is demo with a Faster R-CNN model loaded from
# :func:`~torchvision.models.detection.fasterrcnn_resnet50_fpn`

from torchvision.models.detection import fasterrcnn_resnet50_fpn
from torchvision.models.detection import fasterrcnn_resnet50_fpn, FasterRCNN_ResNet50_FPN_Weights

model = fasterrcnn_resnet50_fpn(pretrained=True, progress=False)
weights = FasterRCNN_ResNet50_FPN_Weights.DEFAULT
model = fasterrcnn_resnet50_fpn(weights=weights, progress=False)
print(img.size())

img = F.convert_image_dtype(img, torch.float)
tranforms = weights.transforms()
img, _ = tranforms(img)
target = {}
target["boxes"] = boxes
target["labels"] = labels = torch.ones((masks.size(0),), dtype=torch.int64)
Expand Down
12 changes: 4 additions & 8 deletions gallery/plot_scripted_tensor_transforms.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,20 +85,16 @@ def show(imgs):
# Let's define a ``Predictor`` module that transforms the input tensor and then
# applies an ImageNet model on it.

from torchvision.models import resnet18
from torchvision.models import resnet18, ResNet18_Weights


class Predictor(nn.Module):

def __init__(self):
super().__init__()
self.resnet18 = resnet18(pretrained=True, progress=False).eval()
self.transforms = nn.Sequential(
T.Resize([256, ]), # We use single int value inside a list due to torchscript type restrictions
T.CenterCrop(224),
T.ConvertImageDtype(torch.float),
T.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
)
weights = ResNet18_Weights.DEFAULT
self.resnet18 = resnet18(weights=weights, progress=False).eval()
self.transforms = weights.transforms()

def forward(self, x: torch.Tensor) -> torch.Tensor:
with torch.no_grad():
Expand Down
39 changes: 27 additions & 12 deletions gallery/plot_visualization_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,14 +73,17 @@ def show(imgs):
# :func:`~torchvision.models.detection.ssd300_vgg16`. For more details
# on the output of such models, you may refer to :ref:`instance_seg_output`.

from torchvision.models.detection import fasterrcnn_resnet50_fpn
from torchvision.transforms.functional import convert_image_dtype
from torchvision.models.detection import fasterrcnn_resnet50_fpn, FasterRCNN_ResNet50_FPN_Weights


batch_int = torch.stack([dog1_int, dog2_int])
batch = convert_image_dtype(batch_int, dtype=torch.float)

model = fasterrcnn_resnet50_fpn(pretrained=True, progress=False)
weights = FasterRCNN_ResNet50_FPN_Weights.DEFAULT
transforms = weights.transforms()

batch, _ = transforms(batch_int)

model = fasterrcnn_resnet50_fpn(weights=weights, progress=False)
model = model.eval()

outputs = model(batch)
Expand Down Expand Up @@ -120,13 +123,15 @@ def show(imgs):
# images must be normalized before they're passed to a semantic segmentation
# model.

from torchvision.models.segmentation import fcn_resnet50
from torchvision.models.segmentation import fcn_resnet50, FCN_ResNet50_Weights

weights = FCN_ResNet50_Weights.DEFAULT
transforms = weights.transforms(resize_size=None)

model = fcn_resnet50(pretrained=True, progress=False)
model = fcn_resnet50(weights=weights, progress=False)
model = model.eval()

normalized_batch = F.normalize(batch, mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225))
normalized_batch, _ = transforms(batch)
output = model(normalized_batch)['out']
print(output.shape, output.min().item(), output.max().item())

Expand Down Expand Up @@ -262,8 +267,14 @@ def show(imgs):
# of them may not have masks, like
# :func:`~torchvision.models.detection.fasterrcnn_resnet50_fpn`.

from torchvision.models.detection import maskrcnn_resnet50_fpn
model = maskrcnn_resnet50_fpn(pretrained=True, progress=False)
from torchvision.models.detection import maskrcnn_resnet50_fpn, MaskRCNN_ResNet50_FPN_Weights

weights = MaskRCNN_ResNet50_FPN_Weights.DEFAULT
transforms = weights.transforms()

batch, _ = transforms(batch_int)

model = maskrcnn_resnet50_fpn(weights=weights, progress=False)
model = model.eval()

output = model(batch)
Expand Down Expand Up @@ -378,13 +389,17 @@ def show(imgs):
# Note that the keypoint detection model does not need normalized images.
#

from torchvision.models.detection import keypointrcnn_resnet50_fpn
from torchvision.models.detection import keypointrcnn_resnet50_fpn, KeypointRCNN_ResNet50_FPN_Weights
from torchvision.io import read_image

person_int = read_image(str(Path("assets") / "person1.jpg"))
person_float = convert_image_dtype(person_int, dtype=torch.float)

model = keypointrcnn_resnet50_fpn(pretrained=True, progress=False)
weights = KeypointRCNN_ResNet50_FPN_Weights.DEFAULT
transforms = weights.transforms()

person_float, _ = transforms(person_int)

model = keypointrcnn_resnet50_fpn(weights=weights, progress=False)
model = model.eval()

outputs = model([person_float])
Expand Down
13 changes: 10 additions & 3 deletions ios/VisionTestApp/make_assets.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,18 @@
import torch
import torchvision
from torch.utils.mobile_optimizer import optimize_for_mobile
from torchvision.models.detection import (
fasterrcnn_mobilenet_v3_large_320_fpn,
FasterRCNN_MobileNet_V3_Large_320_FPN_Weights,
)

print(torch.__version__)

model = torchvision.models.detection.fasterrcnn_mobilenet_v3_large_320_fpn(
pretrained=True, box_score_thresh=0.7, rpn_post_nms_top_n_test=100, rpn_score_thresh=0.4, rpn_pre_nms_top_n_test=150
model = fasterrcnn_mobilenet_v3_large_320_fpn(
weights=FasterRCNN_MobileNet_V3_Large_320_FPN_Weights.DEFAULT,
box_score_thresh=0.7,
rpn_post_nms_top_n_test=100,
rpn_score_thresh=0.4,
rpn_pre_nms_top_n_test=150,
)

model.eval()
Expand Down
2 changes: 1 addition & 1 deletion test/tracing/frcnn/trace_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
HERE = osp.dirname(osp.abspath(__file__))
ASSETS = osp.dirname(osp.dirname(HERE))

model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=False)
model = torchvision.models.detection.fasterrcnn_resnet50_fpn()
model.eval()

traced_model = torch.jit.script(model)
Expand Down
2 changes: 1 addition & 1 deletion torchvision/models/_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ class IntermediateLayerGetter(nn.ModuleDict):
Examples::
>>> m = torchvision.models.resnet18(pretrained=True)
>>> m = torchvision.models.resnet18(weights=ResNet18_Weights.DEFAULT)
>>> # extract layer1 and layer3, giving as names `feat1` and feat2`
>>> new_m = torchvision.models._utils.IntermediateLayerGetter(m,
>>> {'layer1': 'feat1', 'layer3': 'feat2'})
Expand Down
27 changes: 18 additions & 9 deletions torchvision/models/detection/backbone_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,8 @@
from torchvision.ops.feature_pyramid_network import ExtraFPNBlock, FeaturePyramidNetwork, LastLevelMaxPool

from .. import mobilenet, resnet
from .._utils import IntermediateLayerGetter
from .._api import WeightsEnum
from .._utils import IntermediateLayerGetter, handle_legacy_interface


class BackboneWithFPN(nn.Module):
Expand Down Expand Up @@ -55,9 +56,13 @@ def forward(self, x: Tensor) -> Dict[str, Tensor]:
return x


@handle_legacy_interface(
weights=("pretrained", True), # type: ignore[arg-type]
)
def resnet_fpn_backbone(
*,
backbone_name: str,
pretrained: bool,
weights: Optional[WeightsEnum],
norm_layer: Callable[..., nn.Module] = misc_nn_ops.FrozenBatchNorm2d,
trainable_layers: int = 3,
returned_layers: Optional[List[int]] = None,
Expand All @@ -69,7 +74,7 @@ def resnet_fpn_backbone(
Examples::
>>> from torchvision.models.detection.backbone_utils import resnet_fpn_backbone
>>> backbone = resnet_fpn_backbone('resnet50', pretrained=True, trainable_layers=3)
>>> backbone = resnet_fpn_backbone('resnet50', weights=ResNet50_Weights.DEFAULT, trainable_layers=3)
>>> # get some dummy image
>>> x = torch.rand(1,3,64,64)
>>> # compute the output
Expand All @@ -85,7 +90,7 @@ def resnet_fpn_backbone(
Args:
backbone_name (string): resnet architecture. Possible values are 'resnet18', 'resnet34', 'resnet50',
'resnet101', 'resnet152', 'resnext50_32x4d', 'resnext101_32x8d', 'wide_resnet50_2', 'wide_resnet101_2'
pretrained (bool): If True, returns a model with backbone pre-trained on Imagenet
weights (WeightsEnum, optional): The pretrained weights for the model
norm_layer (callable): it is recommended to use the default value. For details visit:
(https://github.com/facebookresearch/maskrcnn-benchmark/issues/267)
trainable_layers (int): number of trainable (not frozen) layers starting from final block.
Expand All @@ -98,7 +103,7 @@ def resnet_fpn_backbone(
a new list of feature maps and their corresponding names. By
default a ``LastLevelMaxPool`` is used.
"""
backbone = resnet.__dict__[backbone_name](pretrained=pretrained, norm_layer=norm_layer)
backbone = resnet.__dict__[backbone_name](weights=weights, norm_layer=norm_layer)
return _resnet_fpn_extractor(backbone, trainable_layers, returned_layers, extra_blocks)


Expand Down Expand Up @@ -135,13 +140,13 @@ def _resnet_fpn_extractor(


def _validate_trainable_layers(
pretrained: bool,
is_trained: bool,
trainable_backbone_layers: Optional[int],
max_value: int,
default_value: int,
) -> int:
# don't freeze any layers if pretrained model or backbone is not used
if not pretrained:
if not is_trained:
if trainable_backbone_layers is not None:
warnings.warn(
"Changing trainable_backbone_layers has not effect if "
Expand All @@ -160,16 +165,20 @@ def _validate_trainable_layers(
return trainable_backbone_layers


@handle_legacy_interface(
weights=("pretrained", True), # type: ignore[arg-type]
)
def mobilenet_backbone(
*,
backbone_name: str,
pretrained: bool,
weights: Optional[WeightsEnum],
fpn: bool,
norm_layer: Callable[..., nn.Module] = misc_nn_ops.FrozenBatchNorm2d,
trainable_layers: int = 2,
returned_layers: Optional[List[int]] = None,
extra_blocks: Optional[ExtraFPNBlock] = None,
) -> nn.Module:
backbone = mobilenet.__dict__[backbone_name](pretrained=pretrained, norm_layer=norm_layer)
backbone = mobilenet.__dict__[backbone_name](weights=weights, norm_layer=norm_layer)
return _mobilenet_extractor(backbone, fpn, trainable_layers, returned_layers, extra_blocks)


Expand Down
8 changes: 4 additions & 4 deletions torchvision/models/detection/faster_rcnn.py
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,7 @@ class FasterRCNN(GeneralizedRCNN):
>>> from torchvision.models.detection.rpn import AnchorGenerator
>>> # load a pre-trained model for classification and return
>>> # only the features
>>> backbone = torchvision.models.mobilenet_v2(pretrained=True).features
>>> backbone = torchvision.models.mobilenet_v2(weights=MobileNet_V2_Weights.DEFAULT).features
>>> # FasterRCNN needs to know the number of
>>> # output channels in a backbone. For mobilenet_v2, it's 1280
>>> # so we need to add it here
Expand Down Expand Up @@ -415,7 +415,7 @@ def fasterrcnn_resnet50_fpn(
Example::
>>> model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True)
>>> model = torchvision.models.detection.fasterrcnn_resnet50_fpn(weights=FasterRCNN_ResNet50_FPN_Weights.DEFAULT)
>>> # For training
>>> images, boxes = torch.rand(4, 3, 600, 1200), torch.rand(4, 11, 4)
>>> boxes[:, :, 2:4] = boxes[:, :, 0:2] + boxes[:, :, 2:4]
Expand Down Expand Up @@ -532,7 +532,7 @@ def fasterrcnn_mobilenet_v3_large_320_fpn(
Example::
>>> model = torchvision.models.detection.fasterrcnn_mobilenet_v3_large_320_fpn(pretrained=True)
>>> model = torchvision.models.detection.fasterrcnn_mobilenet_v3_large_320_fpn(weights=FasterRCNN_MobileNet_V3_Large_320_FPN_Weights.DEFAULT)
>>> model.eval()
>>> x = [torch.rand(3, 300, 400), torch.rand(3, 500, 400)]
>>> predictions = model(x)
Expand Down Expand Up @@ -589,7 +589,7 @@ def fasterrcnn_mobilenet_v3_large_fpn(
Example::
>>> model = torchvision.models.detection.fasterrcnn_mobilenet_v3_large_fpn(pretrained=True)
>>> model = torchvision.models.detection.fasterrcnn_mobilenet_v3_large_fpn(weights=FasterRCNN_MobileNet_V3_Large_FPN_Weights.DEFAULT)
>>> model.eval()
>>> x = [torch.rand(3, 300, 400), torch.rand(3, 500, 400)]
>>> predictions = model(x)
Expand Down
Loading

0 comments on commit 6d96ed5

Please sign in to comment.