diff --git a/docs/make.jl b/docs/make.jl index 7959b0428..0975c48d9 100644 --- a/docs/make.jl +++ b/docs/make.jl @@ -24,6 +24,7 @@ makedocs(; modules = [Metalhead, Artifacts, LazyArtifacts, Images, DataAugmentat "api/resnet.md", "api/densenet.md", "api/efficientnet.md", + "api/mobilenet.md", "api/inception.md", "api/hybrid.md", "api/others.md", diff --git a/docs/src/api/efficientnet.md b/docs/src/api/efficientnet.md index ac88d9656..00b909b3e 100644 --- a/docs/src/api/efficientnet.md +++ b/docs/src/api/efficientnet.md @@ -1,10 +1,8 @@ -# Efficient Networks +# EfficientNet family of models + +This is the API reference for the EfficientNet family of models supported by Metalhead.jl. ```@docs EfficientNet EfficientNetv2 -MobileNetv1 -MobileNetv2 -MobileNetv3 -MNASNet -``` \ No newline at end of file +``` diff --git a/docs/src/api/hybrid.md b/docs/src/api/hybrid.md index adcb65822..6ce6f4124 100644 --- a/docs/src/api/hybrid.md +++ b/docs/src/api/hybrid.md @@ -14,4 +14,4 @@ ConvNeXt ```@docs Metalhead.convmixer Metalhead.convnext -``` \ No newline at end of file +``` diff --git a/docs/src/api/inception.md b/docs/src/api/inception.md index e7470f4ec..59d3ab697 100644 --- a/docs/src/api/inception.md +++ b/docs/src/api/inception.md @@ -1,4 +1,4 @@ -# Inception models +# Inception family of models This is the API reference for the Inception family of models supported by Metalhead.jl. diff --git a/docs/src/api/layers.md b/docs/src/api/layers.md index 3b23693bb..56aafda47 100644 --- a/docs/src/api/layers.md +++ b/docs/src/api/layers.md @@ -1,6 +1,6 @@ # Layers -Metalhead also defines a module called `Layers` which contains some more modern layers that are not available in Flux. To use the functions defined in the `Layers` module, you need to import it. +Metalhead also defines a module called `Layers` which contains some custom layers that are used to configure the models in Metalhead. These layers are not available in Flux at present. To use the functions defined in the `Layers` module, you need to import it. ```julia using Metalhead: Layers @@ -10,9 +10,91 @@ This page contains the API reference for the `Layers` module. !!! warning - The `Layers` module is still a work in progress. While we will endeavour to keep the API stable, we cannot guarantee that it will not change in the future. If you find any of the functions in this - module do not work as expected, please open an issue on GitHub. + The `Layers` module is still a work in progress. While we will endeavour to keep the API stable, we cannot guarantee that it will not change in the future. If you find any of the functions in this module do not work as expected, please open an issue on GitHub. -```@autodocs -Modules = [Metalhead.Layers] +## Convolution + BatchNorm layers + +```@docs +Metalhead.Layers.conv_norm +Metalhead.Layers.basic_conv_bn +``` + +## Convolution-related custom blocks + +These blocks are designed to be used in convolutional neural networks. Most of these are used in the MobileNet and EfficientNet family of models, but they also feature in "fancier" versions of well known-models like ResNet (SE-ResNet). + +```@docs +Metalhead.Layers.dwsep_conv_norm +Metalhead.Layers.mbconv +Metalhead.Layers.fused_mbconv +Metalhead.Layers.squeeze_excite +Metalhead.Layers.effective_squeeze_excite +``` + +## Normalisation, Dropout and Pooling layers + +Metalhead provides various custom layers for normalisation, dropout and pooling which have been used to additionally customise various models. + +### Normalisation layers + +```@docs +Metalhead.Layers.ChannelLayerNorm +Metalhead.Layers.LayerNormV2 +Metalhead.Layers.LayerScale +``` + +### Dropout layers + +```@docs +Metalhead.Layers.DropBlock +Metalhead.Layers.dropblock +Metalhead.Layers.StochasticDepth +``` + +### Pooling layers + +```@docs +Metalhead.Layers.AdaptiveMeanMaxPool +``` + +## Classifier creation + +Metalhead provides a function to create a classifier for neural network models that is quite flexible, and is used by the library extensively to create the classifier "head" for networks. + +```@docs +Metalhead.Layers.create_classifier +``` + +## Vision transformer-related layers + +The `Layers` module contains specific layers that are used to build vision transformer (ViT)-inspired models: + +```@docs +Metalhead.Layers.MultiHeadSelfAttention +Metalhead.Layers.ClassTokens +Metalhead.Layers.ViPosEmbedding +Metalhead.Layers.PatchEmbedding +``` + +## MLPMixer-related blocks + +Apart from this, the `Layers` module also contains certain blocks used in MLPMixer-style models: + +```@docs +Metalhead.Layers.gated_mlp_block +Metalhead.Layers.mlp_block +``` + +## Utilities for layers + +These are some miscellaneous utilities present in the `Layers` module, and are used with other custom/inbuilt layers to make certain common operations in neural networks easier. + +```@docs +Metalhead.Layers.inputscale +Metalhead.Layers.actadd +Metalhead.Layers.addact +Metalhead.Layers.cat_channels +Metalhead.Layers.flatten_chains +Metalhead.Layers.linear_scheduler +Metalhead.Layers.swapdims ``` diff --git a/docs/src/api/mixers.md b/docs/src/api/mixers.md index 42a19f28f..6b93e8ac4 100644 --- a/docs/src/api/mixers.md +++ b/docs/src/api/mixers.md @@ -23,4 +23,4 @@ Metalhead.mixerblock Metalhead.resmixerblock Metalhead.SpatialGatingUnit Metalhead.spatialgatingblock -``` \ No newline at end of file +``` diff --git a/docs/src/api/mobilenet.md b/docs/src/api/mobilenet.md new file mode 100644 index 000000000..21646023c --- /dev/null +++ b/docs/src/api/mobilenet.md @@ -0,0 +1,10 @@ +# MobileNet family of models + +This is the API reference for the MobileNet family of models supported by Metalhead.jl. + +```@docs +MobileNetv1 +MobileNetv2 +MobileNetv3 +MNASNet +``` diff --git a/docs/src/api/utilities.md b/docs/src/api/utilities.md index e80a17365..d6e237d19 100644 --- a/docs/src/api/utilities.md +++ b/docs/src/api/utilities.md @@ -1,10 +1,8 @@ -# Utilities +# Model utilities Metalhead provides some utility functions for making it easier to work with the models inside the library or to build new ones. The API reference for these is documented below. -## `backbone` and `classifier` - ```@docs backbone classifier -``` \ No newline at end of file +``` diff --git a/docs/src/howto/resnet.md b/docs/src/howto/resnet.md index 3adbe2bea..54cce7f52 100644 --- a/docs/src/howto/resnet.md +++ b/docs/src/howto/resnet.md @@ -19,3 +19,45 @@ model = ResNet(50; pretrain=true) ``` To check out more about using pretrained models, check out the [pretrained models guide](@ref pretrained). + +## The mid-level function + +Metalhead also provides a function for users looking to customise the ResNet family of models further. This function is named [`Metalhead.resnet`](@ref) and has a detailed docstring that describes all the various customisation options. + +```@docs +Metalhead.resnet +``` + +But how do we use this to build a ResNet-like model of our liking? First, let's take a peek at how we would write the vanilla ResNet-18 model using this function. We know from the docstring that we want to use `Metalhead.basicblock` for the block, since the paper uses bottleneck blocks for depths 50 and above. We also know that the number of block repeats in each stage of the model as per the paper - 2 for each. For all other options, the default values work well. So we can write the ResNet-18 model as follows: + +```julia +resnet18 = Metalhead.resnet(Metalhead.basicblock, [2, 2, 2, 2]) +``` + +What if we want to customise the number of output classes? That's easy; the model has several keyword arguments, one of which allows this. The docstring tells us that it is `nclasses`, and so we can write: + +```julia +resnet18 = Metalhead.resnet(Metalhead.basicblock, [2, 2, 2, 2]; nclasses = 10) +``` + +Let's try customising this further. Say I want to make a ResNet-50-like model, but with [`StochasticDepth`](https://arxiv.org/abs/1603.09382) to provide even more regularisation, and also a custom pooling layer such as `AdaptiveMeanMaxPool`. Both of these options are provided by Metalhead out of the box, and so we can write: + +```julia +using Metalhead: Layers # AdaptiveMeanMaxPool is exported by the Layers module in Metalhead + +custom_resnet = Metalhead.resnet(Metalhead.bottleneck, [3, 4, 6, 3]; + pool_layer = AdaptiveMeanMaxPool((1, 1)), + stochastic_depth_prob = 0.2) +``` + +To make this a ResNeXt-like model, all we need to do is configure the cardinality and the +base width: + +```julia +custom_resnet = Metalhead.resnet(Metalhead.bottleneck, [3, 4, 6, 3]; + cardinality = 32, base_width = 4, + pool_layer = Layers.AdaptiveMeanMaxPool((1, 1)), + stochastic_depth_prob = 0.2) +``` + +And we have a custom model, built with minimal effort! The documentation for `Metalhead.resnet` has been written with extensive care and in as much detail as possible to facilitate user ease. However, if you find anything difficult to understand, feel free to open an issue and we will be happy to help you out, and to improve the documentation where necessary. diff --git a/docs/src/tutorials/quickstart.md b/docs/src/tutorials/quickstart.md index ef6a7b5cd..7b89d77cb 100644 --- a/docs/src/tutorials/quickstart.md +++ b/docs/src/tutorials/quickstart.md @@ -2,9 +2,17 @@ Metalhead.jl is a library written in Flux.jl that is a collection of image models, layers and utilities for deep learning in computer vision. -## Pre-trained models +## Model architectures and pre-trained models -In Metalhead.jl, camel-cased functions mimicking the naming style followed in the paper such as [`ResNet`](@ref) or [`ResNeXt`](@ref) are considered the "higher" level API for models. These are the functions that end-users who do not want to experiment much with model architectures should use. These models also support the option for loading pre-trained weights from ImageNet. +In Metalhead.jl, camel-cased functions mimicking the naming style followed in the paper such as [`ResNet`](@ref) or [`MobileNetv3`](@ref) are considered the "higher" level API for models. These are the functions that end-users who do not want to experiment much with model architectures should use. To use these models, simply call the function of the model: + +```julia +using Metalhead + +model = ResNet(18); +``` + +The API reference contains the documentation and options for each model function. These models also support the option for loading pre-trained weights from ImageNet. !!! note @@ -18,12 +26,14 @@ using Metalhead model = ResNet(18; pretrain = true); ``` -Refer to the pretraining guide for more details on how to use pre-trained models. +Refer to the [pretraining guide](@pretrained) for more details on how to use pre-trained models. ## More model configuration options -For users who want to use more options for model configuration, Metalhead provides a "mid-level" API for models. The model functions that are in lowercase such as [`resnet`](@ref) or [`mobilenetv3`](@ref) are the "lower" level API for models. These are the functions that end-users who want to experiment with model architectures should use. These models do not support the option for loading pre-trained weights from ImageNet out of the box. +For users who want to use more options for model configuration, Metalhead provides a "mid-level" API for models. These are the model functions that are in lowercase such as [`resnet`](@ref) or [`mobilenetv3`](@ref). End-users who want to experiment with model architectures should use these functions. These models do not support the option for loading pre-trained weights from ImageNet out of the box, although one can always load weights explicitly using the `loadmodel!` function from Flux. + +To use any of these models, check out the docstrings for the model functions (these are documented in the API reference). Note that these functions typically require more configuration options to be passed in, but offer a lot more flexibility in terms of model architecture. Metalhead defines as many default options as possible so as to make it easier for the user to pick and choose specific options to customise. -To use any of these models, check out the docstrings for the model functions. Note that these functions typically require more configuration options to be passed in, but offer a lot more flexibility in terms of model architecture. +## Builders for the advanced user -## \ No newline at end of file +For users who want the ability to customise their models as much as possible, Metalhead offers a powerful low-level interface. These are known as [**builders**](@ref builders) and allow the user to hack into the core of models and build them up as per their liking. Most users will not need to use builders since a large number of configuration options are exposed at the mid-level API. However, for package developers and users who want to build customised versions of their own models, the low-level API provides the customisability required while still reducing user code. diff --git a/src/convnets/resnets/core.jl b/src/convnets/resnets/core.jl index d1ad0d16e..78603cf40 100644 --- a/src/convnets/resnets/core.jl +++ b/src/convnets/resnets/core.jl @@ -372,6 +372,7 @@ Wide ResNet, ResNeXt and Res2Net. For an _even_ more generic model API, see [`Me - `reduction_factor`: The reduction factor used in the model. - `connection`: This is a function that determines the residual connection in the model. For `resnets`, either of [`Metalhead.addact`](@ref) or [`Metalhead.actadd`](@ref) is recommended. + These decide whether the residual connection is added before or after the activation function. - `norm_layer`: The normalisation layer to be used in the model. - `revnorm`: set to `true` to place the normalisation layers before the convolutions - `attn_fn`: A callback that is used to determine the attention function to be used in the model. diff --git a/src/convnets/resnets/resnet.jl b/src/convnets/resnets/resnet.jl index 8f15d0471..a21a5fd0c 100644 --- a/src/convnets/resnets/resnet.jl +++ b/src/convnets/resnets/resnet.jl @@ -25,9 +25,9 @@ function ResNet(depth::Integer; pretrain::Bool = false, inchannels::Integer = 3, model = ResNet(layers) if pretrain artifact_name = "resnet$(depth)" - if depth ∈ [18, 34] + if depth in [18, 34] artifact_name *= "-IMAGENET1K_V1" - elseif depth ∈ [50, 101, 152] + elseif depth in [50, 101, 152] artifact_name *= "-IMAGENET1K_V2" end loadpretrain!(model, artifact_name) @@ -69,7 +69,7 @@ function WideResNet(depth::Integer; pretrain::Bool = false, inchannels::Integer model = WideResNet(layers) if pretrain artifact_name = "wideresnet$(depth)" - if depth ∈ [50, 101] + if depth in [50, 101] artifact_name *= "-IMAGENET1K_V2" end loadpretrain!(model, artifact_name) diff --git a/src/layers/drop.jl b/src/layers/drop.jl index 752668c92..15f8e7533 100644 --- a/src/layers/drop.jl +++ b/src/layers/drop.jl @@ -97,7 +97,7 @@ ChainRulesCore.@non_differentiable _dropblock_checks(x, drop_block_prob, gamma_s function (m::DropBlock)(x) _dropblock_checks(x, m.drop_block_prob, m.gamma_scale) - return Flux._isactive(m, x) ? + return Flux._isactive(m) ? dropblock(m.rng, x, m.drop_block_prob, m.block_size, m.gamma_scale) : x end diff --git a/src/layers/normalise.jl b/src/layers/normalise.jl index 25b3af374..f1589b853 100644 --- a/src/layers/normalise.jl +++ b/src/layers/normalise.jl @@ -25,7 +25,6 @@ end (m::ChannelLayerNorm)(x) = m.diag(Flux.normalise(x; dims = ndims(x) - 1, ϵ = m.ϵ)) - """ LayerNormV2(size..., λ=identity; affine=true, eps=1f-5) @@ -33,16 +32,16 @@ Same as Flux's LayerNorm but eps is added before taking the square root in the d Therefore, LayerNormV2 matches pytorch's LayerNorm. """ struct LayerNormV2{F,D,T,N} - λ::F - diag::D - ϵ::T - size::NTuple{N,Int} - affine::Bool + λ::F + diag::D + ϵ::T + size::NTuple{N,Int} + affine::Bool end function LayerNormV2(size::Tuple{Vararg{Int}}, λ=identity; affine::Bool=true, eps::Real=1f-5) - diag = affine ? Flux.Scale(size..., λ) : λ!=identity ? Base.Fix1(broadcast, λ) : identity - return LayerNormV2(λ, diag, eps, size, affine) + diag = affine ? Flux.Scale(size..., λ) : λ!=identity ? Base.Fix1(broadcast, λ) : identity + return LayerNormV2(λ, diag, eps, size, affine) end LayerNormV2(size::Integer...; kw...) = LayerNormV2(Int.(size); kw...) LayerNormV2(size_act...; kw...) = LayerNormV2(Int.(size_act[1:end-1]), size_act[end]; kw...) @@ -50,19 +49,19 @@ LayerNormV2(size_act...; kw...) = LayerNormV2(Int.(size_act[1:end-1]), size_act[ @functor LayerNormV2 function (a::LayerNormV2)(x::AbstractArray) - eps = convert(float(eltype(x)), a.ϵ) # avoids promotion for Float16 data, but should ε chage too? - a.diag(_normalise(x; dims=1:length(a.size), eps)) + eps = convert(float(eltype(x)), a.ϵ) # avoids promotion for Float16 data, but should ε chage too? + a.diag(_normalise(x; dims=1:length(a.size), eps)) end function Base.show(io::IO, l::LayerNormV2) - print(io, "LayerNormV2(", join(l.size, ", ")) - l.λ === identity || print(io, ", ", l.λ) - Flux.hasaffine(l) || print(io, ", affine=false") - print(io, ")") + print(io, "LayerNormV2(", join(l.size, ", ")) + l.λ === identity || print(io, ", ", l.λ) + Flux.hasaffine(l) || print(io, ", affine=false") + print(io, ")") end @inline function _normalise(x::AbstractArray; dims=ndims(x), eps=Flux.ofeltype(x, 1e-5)) μ = mean(x, dims=dims) σ² = var(x, dims=dims, mean=μ, corrected=false) return @. (x - μ) / sqrt(σ² + eps) - end +end diff --git a/test/runtests.jl b/test/runtests.jl index 82fd9f42c..3353ab26b 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -59,7 +59,7 @@ const TEST_LBLS = readlines(download("https://raw.githubusercontent.com/pytorch/ function acctest(model) ypred = model(TEST_X) |> vec top5 = TEST_LBLS[sortperm(ypred; rev = true)] - return "acoustic guitar" ∈ top5 + return "acoustic guitar" in top5 end x_224 = rand(Float32, 224, 224, 3, 1)