From cd486df2971d0abe43835f6e2ae8ecc5e4cc6b23 Mon Sep 17 00:00:00 2001 From: Abhirath Anand <74202102+theabhirath@users.noreply.github.com> Date: Mon, 1 Aug 2022 07:48:20 +0530 Subject: [PATCH 1/8] Refine `invertedresidual` --- src/convnets/efficientnet.jl | 5 +++-- src/convnets/mobilenet/mobilenetv2.jl | 2 +- src/convnets/mobilenet/mobilenetv3.jl | 2 +- src/layers/conv.jl | 19 ++++++++++++------- 4 files changed, 17 insertions(+), 11 deletions(-) diff --git a/src/convnets/efficientnet.jl b/src/convnets/efficientnet.jl index 4321e9443..71e6f8f0a 100644 --- a/src/convnets/efficientnet.jl +++ b/src/convnets/efficientnet.jl @@ -36,11 +36,12 @@ function efficientnet(scalings, block_configs; out_channels = _round_channels(scalew(o), 8) repeats = scaled(n) push!(blocks, - invertedresidual(k, in_channels, in_channels * e, out_channels, swish; + invertedresidual((k, k), in_channels, in_channels * e, out_channels, swish; stride = s, reduction = 4)) for _ in 1:(repeats - 1) push!(blocks, - invertedresidual(k, out_channels, out_channels * e, out_channels, swish; + invertedresidual((k, k), out_channels, out_channels * e, out_channels, + swish; stride = 1, reduction = 4)) end end diff --git a/src/convnets/mobilenet/mobilenetv2.jl b/src/convnets/mobilenet/mobilenetv2.jl index a97e7dda1..b97fc16ff 100644 --- a/src/convnets/mobilenet/mobilenetv2.jl +++ b/src/convnets/mobilenet/mobilenetv2.jl @@ -30,7 +30,7 @@ function mobilenetv2(width_mult, configs; inchannels = 3, max_width = 1280, ncla outplanes = _round_channels(c * width_mult, width_mult == 0.1 ? 4 : 8) for i in 1:n push!(layers, - invertedresidual(3, inplanes, inplanes * t, outplanes, a; + invertedresidual((3, 3), inplanes, inplanes * t, outplanes, a; stride = i == 1 ? s : 1)) inplanes = outplanes end diff --git a/src/convnets/mobilenet/mobilenetv3.jl b/src/convnets/mobilenet/mobilenetv3.jl index d8666c5f3..d6873ac57 100644 --- a/src/convnets/mobilenet/mobilenetv3.jl +++ b/src/convnets/mobilenet/mobilenetv3.jl @@ -36,7 +36,7 @@ function mobilenetv3(width_mult, configs; inchannels = 3, max_width = 1024, ncla outplanes = _round_channels(c * width_mult, 8) explanes = _round_channels(inplanes * t, 8) push!(layers, - invertedresidual(k, inplanes, explanes, outplanes, a; + invertedresidual((k, k), inplanes, explanes, outplanes, a; stride = s, reduction = r)) inplanes = outplanes end diff --git a/src/layers/conv.jl b/src/layers/conv.jl index 5610d3be2..557db23a7 100644 --- a/src/layers/conv.jl +++ b/src/layers/conv.jl @@ -114,16 +114,17 @@ Create a basic inverted residual block for MobileNet variants - `reduction`: The reduction factor for the number of hidden feature maps in a squeeze and excite layer (see [`squeeze_excite`](#)). """ -function invertedresidual(kernel_size, inplanes, hidden_planes, outplanes, - activation = relu; stride, reduction = nothing) +function invertedresidual(kernel_size, inplanes::Integer, hidden_planes::Integer, + outplanes::Integer, activation = relu; stride::Integer, + reduction::Union{Nothing, Integer} = nothing) @assert stride in [1, 2] "`stride` has to be 1 or 2" pad = @. (kernel_size - 1) ÷ 2 - conv1 = (inplanes == hidden_planes) ? identity : - Chain(conv_norm((1, 1), inplanes, hidden_planes, activation; bias = false)) + conv1 = (inplanes == hidden_planes) ? (identity,) : + conv_norm((1, 1), inplanes, hidden_planes, activation; bias = false) selayer = isnothing(reduction) ? identity : squeeze_excite(hidden_planes; reduction, activation, gate_activation = hardσ, norm_layer = BatchNorm) - invres = Chain(conv1, + invres = Chain(conv1..., conv_norm(kernel_size, hidden_planes, hidden_planes, activation; bias = false, stride, pad = pad, groups = hidden_planes)..., selayer, @@ -131,6 +132,10 @@ function invertedresidual(kernel_size, inplanes, hidden_planes, outplanes, return (stride == 1 && inplanes == outplanes) ? SkipConnection(invres, +) : invres end -function invertedresidual(kernel_size::Integer, args...; kwargs...) - return invertedresidual((kernel_size, kernel_size), args...; kwargs...) +function invertedresidual(kernel_size, inplanes::Integer, outplanes::Integer, + activation = relu; stride::Integer, expansion, + reduction::Union{Nothing, Integer} = nothing) + hidden_planes = Int(inplanes * expansion) + return invertedresidual(kernel_size, inplanes, hidden_planes, outplanes, activation; + stride, reduction) end From e9306c3d48775e683f323ca7561b8646310e7cc0 Mon Sep 17 00:00:00 2001 From: Abhirath Anand <74202102+theabhirath@users.noreply.github.com> Date: Mon, 1 Aug 2022 11:10:48 +0530 Subject: [PATCH 2/8] Expose `inchannels` and `nclasses` for every model Also a. more type annotations b. Expose only configurations vital to the model API in terms of pretraining at the highest level --- .github/workflows/CI.yml | 3 +- src/convnets/alexnet.jl | 12 ++--- src/convnets/convmixer.jl | 20 +++---- src/convnets/convnext.jl | 42 ++++++++------- src/convnets/densenet.jl | 49 +++++++++-------- src/convnets/efficientnet.jl | 36 ++++++------- src/convnets/inception/googlenet.jl | 8 +-- src/convnets/inception/inceptionresnetv2.jl | 12 +++-- src/convnets/inception/inceptionv3.jl | 14 ++--- src/convnets/inception/inceptionv4.jl | 14 ++--- src/convnets/inception/xception.jl | 33 ++++++------ src/convnets/mobilenet/mobilenetv1.jl | 19 +++---- src/convnets/mobilenet/mobilenetv2.jl | 29 +++++----- src/convnets/mobilenet/mobilenetv3.jl | 29 +++++----- src/convnets/resnets/core.jl | 2 +- src/convnets/resnets/resnet.jl | 10 ++-- src/convnets/resnets/resnext.jl | 8 +-- src/convnets/resnets/seresnet.jl | 13 ++--- src/convnets/squeezenet.jl | 34 ++++++++---- src/convnets/vgg.jl | 60 +++++++++++---------- src/layers/conv.jl | 18 ++++--- src/layers/drop.jl | 9 ++-- src/layers/embeddings.jl | 22 ++++---- src/layers/mlp.jl | 10 ++-- src/layers/pool.jl | 5 +- src/layers/scale.jl | 2 +- src/layers/selayers.jl | 8 +-- src/mixers/core.jl | 11 ++-- src/mixers/gmlp.jl | 37 +++++++------ src/mixers/mlpmixer.jl | 34 ++++++------ src/mixers/resmlp.jl | 36 ++++++------- src/vit-based/vit.jl | 24 +++++---- test/convnets.jl | 11 ++-- test/mixers.jl | 36 +++---------- 34 files changed, 363 insertions(+), 347 deletions(-) diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml index d43e61da4..316b7a422 100644 --- a/.github/workflows/CI.yml +++ b/.github/workflows/CI.yml @@ -34,8 +34,7 @@ jobs: - '"Inception"' - '"DenseNet"' - '["ConvNeXt", "ConvMixer"]' - - 'r"ViTs"' - - 'r"Mixers"' + - '[r"ViTs", r"Mixers"]' steps: - uses: actions/checkout@v2 - uses: julia-actions/setup-julia@v1 diff --git a/src/convnets/alexnet.jl b/src/convnets/alexnet.jl index 8ff65ffef..75ba5ad48 100644 --- a/src/convnets/alexnet.jl +++ b/src/convnets/alexnet.jl @@ -1,5 +1,5 @@ """ - alexnet(; nclasses = 1000) + alexnet(; nclasses::Integer = 1000) Create an AlexNet model ([reference](https://papers.nips.cc/paper/2012/file/c399862d3b9d6b76c8436e924a68c45b-Paper.pdf)). @@ -8,8 +8,8 @@ Create an AlexNet model - `nclasses`: the number of output classes """ -function alexnet(; nclasses = 1000) - layers = Chain(Chain(Conv((11, 11), 3 => 64, relu; stride = (4, 4), pad = (2, 2)), +function alexnet(; inchannels::Integer = 3, nclasses::Integer = 1000) + layers = Chain(Chain(Conv((11, 11), inchannels => 64, relu; stride = (4, 4), pad = (2, 2)), MaxPool((3, 3); stride = (2, 2)), Conv((5, 5), 64 => 192, relu; pad = (2, 2)), MaxPool((3, 3); stride = (2, 2)), @@ -28,7 +28,7 @@ function alexnet(; nclasses = 1000) end """ - AlexNet(; pretrain = false, nclasses = 1000) + AlexNet(; pretrain::Bool = false, nclasses::Integer = 1000) Create a `AlexNet`. See also [`alexnet`](#). @@ -47,8 +47,8 @@ struct AlexNet end @functor AlexNet -function AlexNet(; pretrain = false, nclasses = 1000) - layers = alexnet(; nclasses = nclasses) +function AlexNet(; pretrain::Bool = false, inchannels::Integer = 3, nclasses::Integer = 1000) + layers = alexnet(; inchannels, nclasses) if pretrain loadpretrain!(layers, "AlexNet") end diff --git a/src/convnets/convmixer.jl b/src/convnets/convmixer.jl index aa3d144d2..c75303184 100644 --- a/src/convnets/convmixer.jl +++ b/src/convnets/convmixer.jl @@ -1,6 +1,7 @@ """ - convmixer(planes, depth; inchannels = 3, kernel_size = (9, 9), patch_size::Dims{2} = 7, - activation = gelu, nclasses = 1000) + convmixer(planes::Integer, depth::Integer; kernel_size = (9, 9), + patch_size::Dims{2} = (7, 7), activation = gelu, + inchannels::Integer = 3, nclasses::Integer = 1000) Creates a ConvMixer model. ([reference](https://arxiv.org/abs/2201.09792)) @@ -9,14 +10,15 @@ Creates a ConvMixer model. - `planes`: number of planes in the output of each block - `depth`: number of layers - - `inchannels`: The number of channels in the input. - `kernel_size`: kernel size of the convolutional layers - `patch_size`: size of the patches - `activation`: activation function used after the convolutional layers + - `inchannels`: The number of channels in the input. - `nclasses`: number of classes in the output """ -function convmixer(planes, depth; inchannels = 3, kernel_size = (9, 9), - patch_size::Dims{2} = (7, 7), activation = gelu, nclasses = 1000) +function convmixer(planes::Integer, depth::Integer; kernel_size = (9, 9), + patch_size::Dims{2} = (7, 7), activation = gelu, + inchannels::Integer = 3, nclasses::Integer = 1000) stem = conv_norm(patch_size, inchannels, planes, activation; preact = true, stride = patch_size[1]) blocks = [Chain(SkipConnection(Chain(conv_norm(kernel_size, planes, planes, activation; @@ -39,7 +41,7 @@ const CONVMIXER_CONFIGS = Dict(:base => Dict(:planes => 1536, :depth => 20, :patch_size => (7, 7))) """ - ConvMixer(mode::Symbol = :base; inchannels = 3, activation = gelu, nclasses = 1000) + ConvMixer(mode::Symbol; inchannels::Integer = 3, nclasses::Integer = 1000) Creates a ConvMixer model. ([reference](https://arxiv.org/abs/2201.09792)) @@ -48,7 +50,6 @@ Creates a ConvMixer model. - `mode`: the mode of the model, either `:base`, `:small` or `:large` - `inchannels`: The number of channels in the input. - - `activation`: activation function used after the convolutional layers - `nclasses`: number of classes in the output """ struct ConvMixer @@ -56,14 +57,13 @@ struct ConvMixer end @functor ConvMixer -function ConvMixer(mode::Symbol = :base; inchannels = 3, activation = gelu, nclasses = 1000) +function ConvMixer(mode::Symbol; inchannels::Integer = 3, nclasses::Integer = 1000) _checkconfig(mode, keys(CONVMIXER_CONFIGS)) planes = CONVMIXER_CONFIGS[mode][:planes] depth = CONVMIXER_CONFIGS[mode][:depth] kernel_size = CONVMIXER_CONFIGS[mode][:kernel_size] patch_size = CONVMIXER_CONFIGS[mode][:patch_size] - layers = convmixer(planes, depth; inchannels, kernel_size, patch_size, activation, - nclasses) + layers = convmixer(planes, depth; inchannels, kernel_size, patch_size, nclasses) return ConvMixer(layers) end diff --git a/src/convnets/convnext.jl b/src/convnets/convnext.jl index e6ccee16a..d7c39cc04 100644 --- a/src/convnets/convnext.jl +++ b/src/convnets/convnext.jl @@ -1,5 +1,5 @@ """ - convnextblock(planes, drop_path_rate = 0., λ = 1f-6) + convnextblock(planes::Integer, drop_path_rate = 0.0, layerscale_init = 1.0f-6) Creates a single block of ConvNeXt. ([reference](https://arxiv.org/abs/2201.03545)) @@ -8,21 +8,23 @@ Creates a single block of ConvNeXt. - `planes`: number of input channels. - `drop_path_rate`: Stochastic depth rate. - - `λ`: Initial value for [`LayerScale`](#) + - `layerscale_init`: Initial value for [`LayerScale`](#) """ -function convnextblock(planes, drop_path_rate = 0.0, λ = 1.0f-6) +function convnextblock(planes::Integer, drop_path_rate = 0.0, layerscale_init = 1.0f-6) layers = SkipConnection(Chain(DepthwiseConv((7, 7), planes => planes; pad = 3), swapdims((3, 1, 2, 4)), LayerNorm(planes; ϵ = 1.0f-6), mlp_block(planes, 4 * planes), - LayerScale(planes, λ), + LayerScale(planes, layerscale_init), swapdims((2, 3, 1, 4)), DropPath(drop_path_rate)), +) return layers end """ - convnext(depths, planes; inchannels = 3, drop_path_rate = 0., λ = 1f-6, nclasses = 1000) + convnext(depths::Vector{<:Integer}, planes::Vector{<:Integer}; + drop_path_rate = 0.0, layerscale_init = 1.0f-6, inchannels::Integer = 3, + nclasses::Integer = 1000) Creates the layers for a ConvNeXt model. ([reference](https://arxiv.org/abs/2201.03545)) @@ -33,12 +35,13 @@ Creates the layers for a ConvNeXt model. - `depths`: list with configuration for depth of each block - `planes`: list with configuration for number of output channels in each block - `drop_path_rate`: Stochastic depth rate. - - `λ`: Initial value for [`LayerScale`](#) + - `layerscale_init`: Initial value for [`LayerScale`](#) ([reference](https://arxiv.org/abs/2103.17239)) - `nclasses`: number of output classes """ -function convnext(depths, planes; inchannels = 3, drop_path_rate = 0.0, λ = 1.0f-6, - nclasses = 1000) +function convnext(depths::Vector{<:Integer}, planes::Vector{<:Integer}; + drop_path_rate = 0.0, layerscale_init = 1.0f-6, inchannels::Integer = 3, + nclasses::Integer = 1000) @assert length(depths) == length(planes) "`planes` should have exactly one value for each block" downsample_layers = [] @@ -54,7 +57,9 @@ function convnext(depths, planes; inchannels = 3, drop_path_rate = 0.0, λ = 1.0 dp_rates = linear_scheduler(drop_path_rate; depth = sum(depths)) cur = 0 for i in eachindex(depths) - push!(stages, [convnextblock(planes[i], dp_rates[cur + j], λ) for j in 1:depths[i]]) + push!(stages, + [convnextblock(planes[i], dp_rates[cur + j], layerscale_init) + for j in 1:depths[i]]) cur += depths[i] end backbone = collect(Iterators.flatten(Iterators.flatten(zip(downsample_layers, stages)))) @@ -72,13 +77,8 @@ const CONVNEXT_CONFIGS = Dict(:tiny => ([3, 3, 9, 3], [96, 192, 384, 768]), :large => ([3, 3, 27, 3], [192, 384, 768, 1536]), :xlarge => ([3, 3, 27, 3], [256, 512, 1024, 2048])) -struct ConvNeXt - layers::Any -end -@functor ConvNeXt - """ - ConvNeXt(mode::Symbol = :base; inchannels = 3, drop_path_rate = 0., λ = 1f-6, nclasses = 1000) + ConvNeXt(mode::Symbol; inchannels::Integer = 3, nclasses::Integer = 1000) Creates a ConvNeXt model. ([reference](https://arxiv.org/abs/2201.03545)) @@ -86,16 +86,18 @@ Creates a ConvNeXt model. # Arguments - `inchannels`: The number of channels in the input. - - `drop_path_rate`: Stochastic depth rate. - - `λ`: Init value for [LayerScale](https://arxiv.org/abs/2103.17239) - `nclasses`: number of output classes See also [`Metalhead.convnext`](#). """ -function ConvNeXt(mode::Symbol = :base; inchannels = 3, drop_path_rate = 0.0, λ = 1.0f-6, - nclasses = 1000) +struct ConvNeXt + layers::Any +end +@functor ConvNeXt + +function ConvNeXt(mode::Symbol; inchannels::Integer = 3, nclasses::Integer = 1000) _checkconfig(mode, keys(CONVNEXT_CONFIGS)) - layers = convnext(CONVNEXT_CONFIGS[mode]...; inchannels, drop_path_rate, λ, nclasses) + layers = convnext(CONVNEXT_CONFIGS[mode]...; inchannels, nclasses) return ConvNeXt(layers) end diff --git a/src/convnets/densenet.jl b/src/convnets/densenet.jl index 332b5551f..0b164e2ab 100644 --- a/src/convnets/densenet.jl +++ b/src/convnets/densenet.jl @@ -10,7 +10,7 @@ Create a Densenet bottleneck layer - `outplanes`: number of output feature maps on bottleneck branch (and scaling factor for inner feature maps; see ref) """ -function dense_bottleneck(inplanes, outplanes) +function dense_bottleneck(inplanes::Integer, outplanes::Integer) inner_channels = 4 * outplanes return SkipConnection(Chain(conv_norm((1, 1), inplanes, inner_channels; bias = false, revnorm = true)..., @@ -30,7 +30,7 @@ Create a DenseNet transition sequence - `inplanes`: number of input feature maps - `outplanes`: number of output feature maps """ -function transition(inplanes, outplanes) +function transition(inplanes::Integer, outplanes::Integer) return Chain(conv_norm((1, 1), inplanes, outplanes; bias = false, revnorm = true)..., MeanPool((2, 2))) end @@ -48,14 +48,14 @@ the number of output feature maps by `growth_rates` with each block - `growth_rates`: the growth (additive) rates of output feature maps after each block (a vector of `k`s from the ref) """ -function dense_block(inplanes, growth_rates) +function dense_block(inplanes::Integer, growth_rates) return [dense_bottleneck(i, o) for (i, o) in zip(inplanes .+ cumsum([0, growth_rates[1:(end - 1)]...]), growth_rates)] end """ - densenet(inplanes, growth_rates; reduction = 0.5, nclasses = 1000) + densenet(inplanes, growth_rates; reduction = 0.5, nclasses::Integer = 1000) Create a DenseNet model ([reference](https://arxiv.org/abs/1608.06993)). @@ -68,9 +68,11 @@ Create a DenseNet model - `reduction`: the factor by which the number of feature maps is scaled across each transition - `nclasses`: the number of output classes """ -function densenet(inplanes, growth_rates; reduction = 0.5, nclasses = 1000) +function densenet(inplanes::Integer, growth_rates; reduction = 0.5, inchannels::Integer = 3, + nclasses::Integer = 1000) layers = [] - append!(layers, conv_norm((7, 7), 3, inplanes; stride = 2, pad = (3, 3), bias = false)) + append!(layers, + conv_norm((7, 7), inchannels, inplanes; stride = 2, pad = (3, 3), bias = false)) push!(layers, MaxPool((3, 3); stride = 2, pad = (1, 1))) outplanes = 0 for (i, rates) in enumerate(growth_rates) @@ -88,7 +90,7 @@ function densenet(inplanes, growth_rates; reduction = 0.5, nclasses = 1000) end """ - densenet(nblocks; growth_rate = 32, reduction = 0.5, nclasses = 1000) + densenet(nblocks; growth_rate = 32, reduction = 0.5, nclasses::Integer = 1000) Create a DenseNet model ([reference](https://arxiv.org/abs/1608.06993)). @@ -100,15 +102,15 @@ Create a DenseNet model - `reduction`: the factor by which the number of feature maps is scaled across each transition - `nclasses`: the number of output classes """ -function densenet(nblocks::NTuple{N, <:Integer}; growth_rate = 32, reduction = 0.5, - nclasses = 1000) where {N} +function densenet(nblocks::Vector{<:Integer}; growth_rate::Integer = 32, reduction = 0.5, + inchannels::Integer = 3, nclasses::Integer = 1000) return densenet(2 * growth_rate, [fill(growth_rate, n) for n in nblocks]; - reduction = reduction, nclasses = nclasses) + reduction, inchannels, nclasses) end """ - DenseNet(nblocks::NTuple{N, <:Integer}; - growth_rate = 32, reduction = 0.5, nclasses = 1000) + DenseNet(nblocks::Vector{<:Integer}; growth_rate::Integer = 32, reduction = 0.5, + inchannels = 3, nclasses::Integer = 1000) Create a DenseNet model ([reference](https://arxiv.org/abs/1608.06993)). @@ -124,29 +126,26 @@ See also [`densenet`](#). struct DenseNet layers::Any end +@functor DenseNet -function DenseNet(nblocks::NTuple{N, <:Integer}; - growth_rate = 32, reduction = 0.5, nclasses = 1000) where {N} - layers = densenet(nblocks; growth_rate = growth_rate, - reduction = reduction, - nclasses = nclasses) +function DenseNet(nblocks::Vector{<:Integer}; growth_rate::Integer = 32, reduction = 0.5, + inchannels = 3, nclasses::Integer = 1000) + layers = densenet(nblocks; growth_rate, reduction, inchannels, nclasses) return DenseNet(layers) end -@functor DenseNet - (m::DenseNet)(x) = m.layers(x) backbone(m::DenseNet) = m.layers[1] classifier(m::DenseNet) = m.layers[2] -const DENSENET_CONFIGS = Dict(121 => (6, 12, 24, 16), - 161 => (6, 12, 36, 24), - 169 => (6, 12, 32, 32), - 201 => (6, 12, 48, 32)) +const DENSENET_CONFIGS = Dict(121 => [6, 12, 24, 16], + 161 => [6, 12, 36, 24], + 169 => [6, 12, 32, 32], + 201 => [6, 12, 48, 32]) """ - DenseNet(config::Integer = 121; pretrain = false, nclasses = 1000) + DenseNet(config::Integer = 121; pretrain::Bool = false, nclasses::Integer = 1000) DenseNet(transition_configs::NTuple{N,Integer}) Create a DenseNet model with specified configuration. Currently supported values are (121, 161, 169, 201) @@ -159,7 +158,7 @@ Set `pretrain = true` to load the model with pre-trained weights for ImageNet. See also [`Metalhead.densenet`](#). """ -function DenseNet(config::Integer = 121; pretrain = false, nclasses = 1000) +function DenseNet(config::Integer = 121; pretrain::Bool = false, nclasses::Integer = 1000) _checkconfig(config, keys(DENSENET_CONFIGS)) model = DenseNet(DENSENET_CONFIGS[config]; nclasses = nclasses) if pretrain diff --git a/src/convnets/efficientnet.jl b/src/convnets/efficientnet.jl index 71e6f8f0a..730840fa4 100644 --- a/src/convnets/efficientnet.jl +++ b/src/convnets/efficientnet.jl @@ -1,6 +1,6 @@ """ - efficientnet(scalings, block_configs; - inchannels = 3, nclasses = 1000, max_width = 1280) + efficientnet(scalings, block_configs; max_width::Integer = 1280, + inchannels::Integer = 3, nclasses::Integer = 1000) Create an EfficientNet model ([reference](https://arxiv.org/abs/1905.11946v5)). @@ -22,8 +22,8 @@ Create an EfficientNet model ([reference](https://arxiv.org/abs/1905.11946v5)). - `max_width`: maximum number of output channels before the fully connected classification blocks """ -function efficientnet(scalings, block_configs; - inchannels = 3, nclasses = 1000, max_width = 1280) +function efficientnet(scalings, block_configs; max_width::Integer = 1280, + inchannels::Integer = 3, nclasses::Integer = 1000) wscale, dscale = scalings scalew(w) = wscale ≈ 1 ? w : ceil(Int64, wscale * w) scaled(d) = dscale ≈ 1 ? d : ceil(Int64, dscale * d) @@ -36,12 +36,11 @@ function efficientnet(scalings, block_configs; out_channels = _round_channels(scalew(o), 8) repeats = scaled(n) push!(blocks, - invertedresidual((k, k), in_channels, in_channels * e, out_channels, swish; + invertedresidual((k, k), in_channels, out_channels, swish; expansion = e, stride = s, reduction = 4)) for _ in 1:(repeats - 1) push!(blocks, - invertedresidual((k, k), out_channels, out_channels * e, out_channels, - swish; + invertedresidual((k, k), out_channels, out_channels, swish; expansion = e, stride = 1, reduction = 4)) end end @@ -74,6 +73,7 @@ const EFFICIENTNET_BLOCK_CONFIGS = [ # w: width scaling # d: depth scaling # r: image resolution +# Data is organised as (r, (w, d)) const EFFICIENTNET_GLOBAL_CONFIGS = Dict(:b0 => (224, (1.0, 1.0)), :b1 => (240, (1.0, 1.1)), :b2 => (260, (1.1, 1.2)), @@ -84,14 +84,9 @@ const EFFICIENTNET_GLOBAL_CONFIGS = Dict(:b0 => (224, (1.0, 1.0)), :b7 => (600, (2.0, 3.1)), :b8 => (672, (2.2, 3.6))) -struct EfficientNet - layers::Any -end -@functor EfficientNet - """ - EfficientNet(scalings, block_configs; - inchannels = 3, nclasses = 1000, max_width = 1280) + EfficientNet(scalings, block_configs; max_width::Integer = 1280, + inchannels::Integer = 3, nclasses::Integer = 1000) Create an EfficientNet model ([reference](https://arxiv.org/abs/1905.11946v5)). See also [`efficientnet`](#). @@ -114,8 +109,13 @@ See also [`efficientnet`](#). - `max_width`: maximum number of output channels before the fully connected classification blocks """ -function EfficientNet(scalings, block_configs; - inchannels = 3, nclasses = 1000, max_width = 1280) +struct EfficientNet + layers::Any +end +@functor EfficientNet + +function EfficientNet(scalings, block_configs; max_width::Integer = 1280, + inchannels::Integer = 3, nclasses::Integer = 1000) layers = efficientnet(scalings, block_configs; inchannels, nclasses, max_width) return EfficientNet(layers) end @@ -126,7 +126,7 @@ backbone(m::EfficientNet) = m.layers[1] classifier(m::EfficientNet) = m.layers[2] """ - EfficientNet(name::Symbol; pretrain = false) + EfficientNet(name::Symbol; pretrain::Bool = false) Create an EfficientNet model ([reference](https://arxiv.org/abs/1905.11946v5)). See also [`efficientnet`](#). @@ -137,7 +137,7 @@ See also [`efficientnet`](#). (can be `:b0`, `:b1`, `:b2`, `:b3`, `:b4`, `:b5`, `:b6`, `:b7`, `:b8`) - `pretrain`: set to `true` to load the pre-trained weights for ImageNet """ -function EfficientNet(name::Symbol; pretrain = false) +function EfficientNet(name::Symbol; pretrain::Bool = false) _checkconfig(name, keys(EFFICIENTNET_GLOBAL_CONFIGS)) model = EfficientNet(EFFICIENTNET_GLOBAL_CONFIGS[name][2], EFFICIENTNET_BLOCK_CONFIGS) pretrain && loadpretrain!(model, string("efficientnet-", name)) diff --git a/src/convnets/inception/googlenet.jl b/src/convnets/inception/googlenet.jl index 8a88ca943..90f92ddfc 100644 --- a/src/convnets/inception/googlenet.jl +++ b/src/convnets/inception/googlenet.jl @@ -27,7 +27,7 @@ function _inceptionblock(inplanes, out_1x1, red_3x3, out_3x3, red_5x5, out_5x5, end """ - googlenet(; nclasses = 1000) + googlenet(; nclasses::Integer = 1000) Create an Inception-v1 model (commonly referred to as GoogLeNet) ([reference](https://arxiv.org/abs/1409.4842v1)). @@ -36,7 +36,7 @@ Create an Inception-v1 model (commonly referred to as GoogLeNet) - `nclasses`: the number of output classes """ -function googlenet(; nclasses = 1000) +function googlenet(; nclasses::Integer = 1000) layers = Chain(Chain(Conv((7, 7), 3 => 64; stride = 2, pad = 3), MaxPool((3, 3); stride = 2, pad = 1), Conv((1, 1), 64 => 64), @@ -61,7 +61,7 @@ function googlenet(; nclasses = 1000) end """ - GoogLeNet(; pretrain = false, nclasses = 1000) + GoogLeNet(; pretrain::Bool = false, nclasses::Integer = 1000) Create an Inception-v1 model (commonly referred to as `GoogLeNet`) ([reference](https://arxiv.org/abs/1409.4842v1)). @@ -82,7 +82,7 @@ struct GoogLeNet end @functor GoogLeNet -function GoogLeNet(; pretrain = false, nclasses = 1000) +function GoogLeNet(; pretrain::Bool = false, nclasses::Integer = 1000) layers = googlenet(; nclasses = nclasses) if pretrain loadpretrain!(layers, "GoogLeNet") diff --git a/src/convnets/inception/inceptionresnetv2.jl b/src/convnets/inception/inceptionresnetv2.jl index 4b4b78706..747da2fb2 100644 --- a/src/convnets/inception/inceptionresnetv2.jl +++ b/src/convnets/inception/inceptionresnetv2.jl @@ -64,7 +64,7 @@ function block8(scale = 1.0f0; activation = identity) end """ - inceptionresnetv2(; inchannels = 3, dropout_rate = 0.0, nclasses = 1000) + inceptionresnetv2(; inchannels::Integer = 3, dropout_rate = 0.0, nclasses::Integer = 1000) Creates an InceptionResNetv2 model. ([reference](https://arxiv.org/abs/1602.07261)) @@ -75,7 +75,8 @@ Creates an InceptionResNetv2 model. - `dropout_rate`: rate of dropout in classifier head. - `nclasses`: the number of output classes. """ -function inceptionresnetv2(; inchannels = 3, dropout_rate = 0.0, nclasses = 1000) +function inceptionresnetv2(; inchannels::Integer = 3, dropout_rate = 0.0, + nclasses::Integer = 1000) body = Chain(conv_norm((3, 3), inchannels, 32; stride = 2)..., conv_norm((3, 3), 32, 32)..., conv_norm((3, 3), 32, 64; pad = 1)..., @@ -97,7 +98,7 @@ function inceptionresnetv2(; inchannels = 3, dropout_rate = 0.0, nclasses = 1000 end """ - InceptionResNetv2(; pretrain = false, inchannels = 3, dropout_rate = 0.0, nclasses = 1000) + InceptionResNetv2(; pretrain::Bool = false, inchannels::Integer = 3, dropout_rate = 0.0, nclasses::Integer = 1000) Creates an InceptionResNetv2 model. ([reference](https://arxiv.org/abs/1602.07261)) @@ -118,8 +119,9 @@ struct InceptionResNetv2 end @functor InceptionResNetv2 -function InceptionResNetv2(; pretrain = false, inchannels = 3, dropout_rate = 0.0, - nclasses = 1000) +function InceptionResNetv2(; pretrain::Bool = false, inchannels::Integer = 3, + dropout_rate = 0.0, + nclasses::Integer = 1000) layers = inceptionresnetv2(; inchannels, dropout_rate, nclasses) if pretrain loadpretrain!(layers, "InceptionResNetv2") diff --git a/src/convnets/inception/inceptionv3.jl b/src/convnets/inception/inceptionv3.jl index 68b283838..8d9977d80 100644 --- a/src/convnets/inception/inceptionv3.jl +++ b/src/convnets/inception/inceptionv3.jl @@ -127,7 +127,7 @@ function inceptionv3_e(inplanes) end """ - inceptionv3(; nclasses = 1000) + inceptionv3(; inchannels::Integer = 3, nclasses::Integer = 1000) Create an Inception-v3 model ([reference](https://arxiv.org/abs/1512.00567v3)). @@ -135,8 +135,8 @@ Create an Inception-v3 model ([reference](https://arxiv.org/abs/1512.00567v3)). - `nclasses`: the number of output classes """ -function inceptionv3(; nclasses = 1000) - layer = Chain(Chain(conv_norm((3, 3), 3, 32; stride = 2)..., +function inceptionv3(; inchannels::Integer = 3, nclasses::Integer = 1000) + layer = Chain(Chain(conv_norm((3, 3), inchannels, 32; stride = 2)..., conv_norm((3, 3), 32, 32)..., conv_norm((3, 3), 32, 64; pad = 1)..., MaxPool((3, 3); stride = 2), @@ -162,7 +162,7 @@ function inceptionv3(; nclasses = 1000) end """ - Inceptionv3(; pretrain = false, nclasses = 1000) + Inceptionv3(; pretrain::Bool = false, inchannels::Integer = 3, nclasses::Integer = 1000) Create an Inception-v3 model ([reference](https://arxiv.org/abs/1512.00567v3)). See also [`inceptionv3`](#). @@ -170,6 +170,7 @@ See also [`inceptionv3`](#). # Arguments - `pretrain`: set to `true` to load the pre-trained weights for ImageNet + - `inchannels`: number of input channels - `nclasses`: the number of output classes !!! warning @@ -180,8 +181,9 @@ struct Inceptionv3 layers::Any end -function Inceptionv3(; pretrain = false, nclasses = 1000) - layers = inceptionv3(; nclasses = nclasses) +function Inceptionv3(; pretrain::Bool = false, inchannels::Integer = 3, + nclasses::Integer = 1000) + layers = inceptionv3(; inchannels, nclasses) if pretrain loadpretrain!(layers, "Inceptionv3") end diff --git a/src/convnets/inception/inceptionv4.jl b/src/convnets/inception/inceptionv4.jl index bb03646ec..b84232fb8 100644 --- a/src/convnets/inception/inceptionv4.jl +++ b/src/convnets/inception/inceptionv4.jl @@ -82,7 +82,7 @@ function inceptionv4_c() end """ - inceptionv4(; inchannels = 3, dropout_rate = 0.0, nclasses = 1000) + inceptionv4(; inchannels::Integer = 3, dropout_rate = 0.0, nclasses::Integer = 1000) Create an Inceptionv4 model. ([reference](https://arxiv.org/abs/1602.07261)) @@ -93,7 +93,8 @@ Create an Inceptionv4 model. - `dropout_rate`: rate of dropout in classifier head. - `nclasses`: the number of output classes. """ -function inceptionv4(; inchannels = 3, dropout_rate = 0.0, nclasses = 1000) +function inceptionv4(; dropout_rate = 0.0, inchannels::Integer = 3, + nclasses::Integer = 1000) body = Chain(conv_norm((3, 3), inchannels, 32; stride = 2)..., conv_norm((3, 3), 32, 32)..., conv_norm((3, 3), 32, 64; pad = 1)..., @@ -122,7 +123,7 @@ function inceptionv4(; inchannels = 3, dropout_rate = 0.0, nclasses = 1000) end """ - Inceptionv4(; pretrain = false, inchannels = 3, dropout_rate = 0.0, nclasses = 1000) + Inceptionv4(; pretrain::Bool = false, inchannels::Integer = 3, nclasses::Integer = 1000) Creates an Inceptionv4 model. ([reference](https://arxiv.org/abs/1602.07261)) @@ -131,7 +132,6 @@ Creates an Inceptionv4 model. - `pretrain`: set to `true` to load the pre-trained weights for ImageNet - `inchannels`: number of input channels. - - `dropout_rate`: rate of dropout in classifier head. - `nclasses`: the number of output classes. !!! warning @@ -143,9 +143,9 @@ struct Inceptionv4 end @functor Inceptionv4 -function Inceptionv4(; pretrain = false, inchannels = 3, dropout_rate = 0.0, - nclasses = 1000) - layers = inceptionv4(; inchannels, dropout_rate, nclasses) +function Inceptionv4(; pretrain::Bool = false, inchannels::Integer = 3, + nclasses::Integer = 1000) + layers = inceptionv4(; inchannels, nclasses) if pretrain loadpretrain!(layers, "Inceptionv4") end diff --git a/src/convnets/inception/xception.jl b/src/convnets/inception/xception.jl index 3c6d8331a..8d2ad13d8 100644 --- a/src/convnets/inception/xception.jl +++ b/src/convnets/inception/xception.jl @@ -1,6 +1,7 @@ """ - xception_block(inchannels, outchannels, nrepeats; stride = 1, start_with_relu = true, - grow_at_start = true) + xception_block(inchannels::Integer, outchannels::Integer, nrepeats::Integer; + stride::Integer = 1, start_with_relu::Bool = true, + grow_at_start::Bool = true) Create an Xception block. ([reference](https://arxiv.org/abs/1610.02357)) @@ -14,9 +15,9 @@ Create an Xception block. - `start_with_relu`: if true, start the block with a ReLU activation. - `grow_at_start`: if true, increase the number of channels at the first convolution. """ -function xception_block(inchannels, outchannels, nrepeats; stride = 1, - start_with_relu = true, - grow_at_start = true) +function xception_block(inchannels::Integer, outchannels::Integer, nrepeats::Integer; + stride::Integer = 1, start_with_relu::Bool = true, + grow_at_start::Bool = true) if outchannels != inchannels || stride != 1 skip = conv_norm((1, 1), inchannels, outchannels, identity; stride = stride, bias = false) @@ -44,7 +45,7 @@ function xception_block(inchannels, outchannels, nrepeats; stride = 1, end """ - xception(; inchannels = 3, dropout_rate = 0.0, nclasses = 1000) + xception(; inchannels::Integer = 3, dropout_rate = 0.0, nclasses::Integer = 1000) Creates an Xception model. ([reference](https://arxiv.org/abs/1610.02357)) @@ -55,7 +56,7 @@ Creates an Xception model. - `dropout_rate`: rate of dropout in classifier head. - `nclasses`: the number of output classes. """ -function xception(; inchannels = 3, dropout_rate = 0.0, nclasses = 1000) +function xception(; dropout_rate = 0.0, inchannels::Integer = 3, nclasses::Integer = 1000) body = Chain(conv_norm((3, 3), inchannels, 32; stride = 2, bias = false)..., conv_norm((3, 3), 32, 64; bias = false)..., xception_block(64, 128, 2; stride = 2, start_with_relu = false), @@ -70,13 +71,8 @@ function xception(; inchannels = 3, dropout_rate = 0.0, nclasses = 1000) return Chain(body, head) end -struct Xception - layers::Any -end -@functor Xception - """ - Xception(; pretrain = false, inchannels = 3, dropout_rate = 0.0, nclasses = 1000) + Xception(; pretrain::Bool = false, inchannels::Integer = 3, nclasses::Integer = 1000) Creates an Xception model. ([reference](https://arxiv.org/abs/1610.02357)) @@ -85,15 +81,20 @@ Creates an Xception model. - `pretrain`: set to `true` to load the pre-trained weights for ImageNet. - `inchannels`: number of input channels. - - `dropout_rate`: rate of dropout in classifier head. - `nclasses`: the number of output classes. !!! warning `Xception` does not currently support pretrained weights. """ -function Xception(; pretrain = false, inchannels = 3, dropout_rate = 0.0, nclasses = 1000) - layers = xception(; inchannels, dropout_rate, nclasses) +struct Xception + layers::Any +end +@functor Xception + +function Xception(; pretrain::Bool = false, inchannels::Integer = 3, + nclasses::Integer = 1000) + layers = xception(; inchannels, nclasses) if pretrain loadpretrain!(layers, "xception") end diff --git a/src/convnets/mobilenet/mobilenetv1.jl b/src/convnets/mobilenet/mobilenetv1.jl index fffa93a4d..e31f8835b 100644 --- a/src/convnets/mobilenet/mobilenetv1.jl +++ b/src/convnets/mobilenet/mobilenetv1.jl @@ -1,8 +1,6 @@ """ - mobilenetv1(width_mult, config; - activation = relu, - inchannels = 3, - nclasses = 1000) + mobilenetv1(width_mult::Number, config::Vector{<:Tuple}; activation = relu, + inchannels::Integer = 3, nclasses::Integer = 1000) Create a MobileNetv1 model ([reference](https://arxiv.org/abs/1704.04861v1)). @@ -21,10 +19,8 @@ Create a MobileNetv1 model ([reference](https://arxiv.org/abs/1704.04861v1)). - `inchannels`: The number of input channels. The default value is 3. - `nclasses`: The number of output classes """ -function mobilenetv1(width_mult, config; - activation = relu, - inchannels = 3, - nclasses = 1000) +function mobilenetv1(width_mult::Number, config::Vector{<:Tuple}; activation = relu, + inchannels::Integer = 3, nclasses::Integer = 1000) layers = [] for (dw, outch, stride, nrepeats) in config outch = Int(outch * width_mult) @@ -61,7 +57,8 @@ const MOBILENETV1_CONFIGS = [ ] """ - MobileNetv1(width_mult = 1; inchannels = 3, pretrain = false, nclasses = 1000) + MobileNetv1(width_mult = 1; inchannels::Integer = 3, pretrain::Bool = false, + nclasses::Integer = 1000) Create a MobileNetv1 model with the baseline configuration ([reference](https://arxiv.org/abs/1704.04861v1)). @@ -83,8 +80,8 @@ struct MobileNetv1 end @functor MobileNetv1 -function MobileNetv1(width_mult::Number = 1; inchannels = 3, pretrain = false, - nclasses = 1000) +function MobileNetv1(width_mult::Number = 1; pretrain::Bool = false, + inchannels::Integer = 3, nclasses::Integer = 1000) layers = mobilenetv1(width_mult, MOBILENETV1_CONFIGS; inchannels, nclasses) if pretrain loadpretrain!(layers, string("MobileNetv1")) diff --git a/src/convnets/mobilenet/mobilenetv2.jl b/src/convnets/mobilenet/mobilenetv2.jl index b97fc16ff..9dd35e9f9 100644 --- a/src/convnets/mobilenet/mobilenetv2.jl +++ b/src/convnets/mobilenet/mobilenetv2.jl @@ -1,5 +1,7 @@ """ - mobilenetv2(width_mult, configs; inchannels = 3, max_width = 1280, nclasses = 1000) + mobilenetv2(width_mult::Number, configs::Vector{<:Tuple}; + max_width::Integer = 1280, inchannels::Integer = 3, + nclasses::Integer = 1000) Create a MobileNetv2 model. ([reference](https://arxiv.org/abs/1801.04381)). @@ -20,7 +22,9 @@ Create a MobileNetv2 model. - `max_width`: The maximum number of feature maps in any layer of the network - `nclasses`: The number of output classes """ -function mobilenetv2(width_mult, configs; inchannels = 3, max_width = 1280, nclasses = 1000) +function mobilenetv2(width_mult::Number, configs::Vector{<:Tuple}; + max_width::Integer = 1280, inchannels::Integer = 3, + nclasses::Integer = 1000) # building first layer inplanes = _round_channels(32 * width_mult, width_mult == 0.1 ? 4 : 8) layers = [] @@ -30,7 +34,7 @@ function mobilenetv2(width_mult, configs; inchannels = 3, max_width = 1280, ncla outplanes = _round_channels(c * width_mult, width_mult == 0.1 ? 4 : 8) for i in 1:n push!(layers, - invertedresidual((3, 3), inplanes, inplanes * t, outplanes, a; + invertedresidual((3, 3), inplanes, outplanes, a; expansion = t, stride = i == 1 ? s : 1)) inplanes = outplanes end @@ -57,13 +61,9 @@ const MOBILENETV2_CONFIGS = [ (6, 320, 1, 1, relu6), ] -struct MobileNetv2 - layers::Any -end -@functor MobileNetv2 - """ - MobileNetv2(width_mult = 1.0; inchannels = 3, pretrain = false, nclasses = 1000) + MobileNetv2(width_mult = 1.0; inchannels::Integer = 3, pretrain::Bool = false, + nclasses::Integer = 1000) Create a MobileNetv2 model with the specified configuration. ([reference](https://arxiv.org/abs/1801.04381)). @@ -74,14 +74,19 @@ Set `pretrain` to `true` to load the pretrained weights for ImageNet. - `width_mult`: Controls the number of output feature maps in each block (with 1.0 being the default in the paper; this is usually a value between 0.1 and 1.4) - - `inchannels`: The number of input channels. - `pretrain`: Whether to load the pre-trained weights for ImageNet + - `inchannels`: The number of input channels. - `nclasses`: The number of output classes See also [`Metalhead.mobilenetv2`](#). """ -function MobileNetv2(width_mult::Number = 1; inchannels = 3, pretrain = false, - nclasses = 1000) +struct MobileNetv2 + layers::Any +end +@functor MobileNetv2 + +function MobileNetv2(width_mult::Number = 1; pretrain::Bool = false, + inchannels::Integer = 3, nclasses::Integer = 1000) layers = mobilenetv2(width_mult, MOBILENETV2_CONFIGS; inchannels, nclasses) pretrain && loadpretrain!(layers, string("MobileNetv2")) if pretrain diff --git a/src/convnets/mobilenet/mobilenetv3.jl b/src/convnets/mobilenet/mobilenetv3.jl index d6873ac57..00c0e0139 100644 --- a/src/convnets/mobilenet/mobilenetv3.jl +++ b/src/convnets/mobilenet/mobilenetv3.jl @@ -1,5 +1,7 @@ """ - mobilenetv3(width_mult, configs; inchannels = 3, max_width = 1024, nclasses = 1000) + mobilenetv3(width_mult::Number, configs::Vector{<:Tuple}; + max_width::Integer = 1024, inchannels::Integer = 3, + nclasses::Integer = 1000) Create a MobileNetv3 model. ([reference](https://arxiv.org/abs/1905.02244)). @@ -22,7 +24,9 @@ Create a MobileNetv3 model. - `max_width`: The maximum number of feature maps in any layer of the network - `nclasses`: the number of output classes """ -function mobilenetv3(width_mult, configs; inchannels = 3, max_width = 1024, nclasses = 1000) +function mobilenetv3(width_mult::Number, configs::Vector{<:Tuple}; + max_width::Integer = 1024, inchannels::Integer = 3, + nclasses::Integer = 1000) # building first layer inplanes = _round_channels(16 * width_mult, 8) layers = [] @@ -86,13 +90,9 @@ const MOBILENETV3_CONFIGS = Dict(:small => [ (5, 6, 160, 4, hardswish, 1), ]) -struct MobileNetv3 - layers::Any -end -@functor MobileNetv3 - """ - MobileNetv3(mode::Symbol = :small, width_mult::Number = 1; inchannels = 3, pretrain = false, nclasses = 1000) + MobileNetv3(mode::Symbol = :small, width_mult::Number = 1; pretrain::Bool = false, + inchannels::Integer = 3, nclasses::Integer = 1000) Create a MobileNetv3 model with the specified configuration. ([reference](https://arxiv.org/abs/1905.02244)). @@ -104,15 +104,20 @@ Set `pretrain = true` to load the model with pre-trained weights for ImageNet. - `width_mult`: Controls the number of output feature maps in each block (with 1.0 being the default in the paper; this is usually a value between 0.1 and 1.4) - - `inchannels`: The number of channels in the input. - `pretrain`: whether to load the pre-trained weights for ImageNet + - `inchannels`: The number of channels in the input. - `nclasses`: the number of output classes See also [`Metalhead.mobilenetv3`](#). """ -function MobileNetv3(mode::Symbol = :small, width_mult::Number = 1; inchannels = 3, - pretrain = false, nclasses = 1000) - @assert mode in [:large, :small] "`mode` has to be either :large or :small" +struct MobileNetv3 + layers::Any +end +@functor MobileNetv3 + +function MobileNetv3(mode::Symbol = :small, width_mult::Number = 1; pretrain::Bool = false, + inchannels::Integer = 3, nclasses::Integer = 1000) + _checkconfig(mode, [:small, :large]) max_width = (mode == :large) ? 1280 : 1024 layers = mobilenetv3(width_mult, MOBILENETV3_CONFIGS[mode]; inchannels, max_width, nclasses) diff --git a/src/convnets/resnets/core.jl b/src/convnets/resnets/core.jl index 03d96d6db..42f61c54a 100644 --- a/src/convnets/resnets/core.jl +++ b/src/convnets/resnets/core.jl @@ -132,7 +132,7 @@ end # end """ - resnet_stem(; stem_type = :default, inchannels = 3, replace_stem_pool = false, + resnet_stem(; stem_type = :default, inchannels::Integer = 3, replace_stem_pool = false, norm_layer = BatchNorm, activation = relu) Builds a stem to be used in a ResNet model. See the `stem` argument of [`resnet`](#) for details diff --git a/src/convnets/resnets/resnet.jl b/src/convnets/resnets/resnet.jl index fac7e7415..9bf9cd82c 100644 --- a/src/convnets/resnets/resnet.jl +++ b/src/convnets/resnets/resnet.jl @@ -1,5 +1,5 @@ """ - ResNet(depth::Integer; pretrain = false, inchannels = 3, nclasses = 1000) + ResNet(depth::Integer; pretrain::Bool = false, inchannels::Integer = 3, nclasses::Integer = 1000) Creates a ResNet model with the specified depth. ((reference)[https://arxiv.org/abs/1512.03385]) @@ -22,7 +22,8 @@ struct ResNet end @functor ResNet -function ResNet(depth::Integer; pretrain = false, inchannels = 3, nclasses = 1000) +function ResNet(depth::Integer; pretrain::Bool = false, inchannels::Integer = 3, + nclasses::Integer = 1000) _checkconfig(depth, keys(RESNET_CONFIGS)) layers = resnet(RESNET_CONFIGS[depth]...; inchannels, nclasses) if pretrain @@ -37,7 +38,7 @@ backbone(m::ResNet) = m.layers[1] classifier(m::ResNet) = m.layers[2] """ - WideResNet(depth::Integer; pretrain = false, inchannels = 3, nclasses = 1000) + WideResNet(depth::Integer; pretrain::Bool = false, inchannels::Integer = 3, nclasses::Integer = 1000) Creates a Wide ResNet model with the specified depth. The model is the same as ResNet except for the bottleneck number of channels which is twice larger in every block. @@ -62,7 +63,8 @@ struct WideResNet end @functor WideResNet -function WideResNet(depth::Integer; pretrain = false, inchannels = 3, nclasses = 1000) +function WideResNet(depth::Integer; pretrain::Bool = false, inchannels::Integer = 3, + nclasses::Integer = 1000) _checkconfig(depth, [50, 101]) layers = resnet(RESNET_CONFIGS[depth]...; base_width = 128, inchannels, nclasses) if pretrain diff --git a/src/convnets/resnets/resnext.jl b/src/convnets/resnets/resnext.jl index 8032df5ab..29d89e3f1 100644 --- a/src/convnets/resnets/resnext.jl +++ b/src/convnets/resnets/resnext.jl @@ -1,6 +1,6 @@ """ - ResNeXt(depth::Integer; pretrain = false, cardinality = 32, - base_width = 4, inchannels = 3, nclasses = 1000) + ResNeXt(depth::Integer; pretrain::Bool = false, cardinality = 32, + base_width = 4, inchannels::Integer = 3, nclasses::Integer = 1000) Creates a ResNeXt model with the specified depth, cardinality, and base width. ((reference)[https://arxiv.org/abs/1611.05431]) @@ -27,8 +27,8 @@ end (m::ResNeXt)(x) = m.layers(x) -function ResNeXt(depth::Integer; pretrain = false, cardinality = 32, - base_width = 4, inchannels = 3, nclasses = 1000) +function ResNeXt(depth::Integer; pretrain::Bool = false, cardinality = 32, + base_width = 4, inchannels::Integer = 3, nclasses::Integer = 1000) _checkconfig(depth, sort(collect(keys(RESNET_CONFIGS)))[3:end]) layers = resnet(RESNET_CONFIGS[depth]...; inchannels, nclasses, cardinality, base_width) if pretrain diff --git a/src/convnets/resnets/seresnet.jl b/src/convnets/resnets/seresnet.jl index 05d842173..61eee3aad 100644 --- a/src/convnets/resnets/seresnet.jl +++ b/src/convnets/resnets/seresnet.jl @@ -1,5 +1,5 @@ """ - SEResNet(depth::Integer; pretrain = false, inchannels = 3, nclasses = 1000) + SEResNet(depth::Integer; pretrain::Bool = false, inchannels::Integer = 3, nclasses::Integer = 1000) Creates a SEResNet model with the specified depth. ((reference)[https://arxiv.org/pdf/1709.01507.pdf]) @@ -24,7 +24,8 @@ end (m::SEResNet)(x) = m.layers(x) -function SEResNet(depth::Integer; pretrain = false, inchannels = 3, nclasses = 1000) +function SEResNet(depth::Integer; pretrain::Bool = false, inchannels::Integer = 3, + nclasses::Integer = 1000) _checkconfig(depth, keys(RESNET_CONFIGS)) layers = resnet(RESNET_CONFIGS[depth]...; inchannels, nclasses, attn_fn = squeeze_excite) @@ -38,8 +39,8 @@ backbone(m::SEResNet) = m.layers[1] classifier(m::SEResNet) = m.layers[2] """ - SEResNeXt(depth::Integer; pretrain = false, cardinality = 32, base_width = 4, - inchannels = 3, nclasses = 1000) + SEResNeXt(depth::Integer; pretrain::Bool = false, cardinality = 32, base_width = 4, + inchannels::Integer = 3, nclasses::Integer = 1000) Creates a SEResNeXt model with the specified depth, cardinality, and base width. ((reference)[https://arxiv.org/pdf/1709.01507.pdf]) @@ -66,8 +67,8 @@ end (m::SEResNeXt)(x) = m.layers(x) -function SEResNeXt(depth::Integer; pretrain = false, cardinality = 32, base_width = 4, - inchannels = 3, nclasses = 1000) +function SEResNeXt(depth::Integer; pretrain::Bool = false, cardinality = 32, base_width = 4, + inchannels::Integer = 3, nclasses::Integer = 1000) _checkconfig(depth, sort(collect(keys(RESNET_CONFIGS)))[3:end]) layers = resnet(RESNET_CONFIGS[depth]...; inchannels, nclasses, cardinality, base_width, attn_fn = squeeze_excite) diff --git a/src/convnets/squeezenet.jl b/src/convnets/squeezenet.jl index abcdd63f8..3ee6653bc 100644 --- a/src/convnets/squeezenet.jl +++ b/src/convnets/squeezenet.jl @@ -1,5 +1,6 @@ """ - fire(inplanes, squeeze_planes, expand1x1_planes, expand3x3_planes) + fire(inplanes::Integer, squeeze_planes::Integer, expand1x1_planes::Integer, + expand3x3_planes::Integer) Create a fire module ([reference](https://arxiv.org/abs/1602.07360v4)). @@ -11,7 +12,8 @@ Create a fire module - `expand1x1_planes`: number of output feature maps for the 1x1 expansion convolution - `expand3x3_planes`: number of output feature maps for the 3x3 expansion convolution """ -function fire(inplanes, squeeze_planes, expand1x1_planes, expand3x3_planes) +function fire(inplanes::Integer, squeeze_planes::Integer, expand1x1_planes::Integer, + expand3x3_planes::Integer) branch_1 = Conv((1, 1), inplanes => squeeze_planes, relu) branch_2 = Conv((1, 1), squeeze_planes => expand1x1_planes, relu) branch_3 = Conv((3, 3), squeeze_planes => expand3x3_planes, relu; pad = 1) @@ -19,13 +21,18 @@ function fire(inplanes, squeeze_planes, expand1x1_planes, expand3x3_planes) end """ - squeezenet() + squeezenet(; inchannels::Integer = 3, nclasses::Integer = 1000) Create a SqueezeNet ([reference](https://arxiv.org/abs/1602.07360v4)). + +# Arguments + + - `inchannels`: number of input channels. + - `nclasses`: the number of output classes. """ -function squeezenet() - return Chain(Chain(Conv((3, 3), 3 => 64, relu; stride = 2), +function squeezenet(; inchannels::Integer = 3, nclasses::Integer = 1000) + return Chain(Chain(Conv((3, 3), inchannels => 64, relu; stride = 2), MaxPool((3, 3); stride = 2), fire(64, 16, 64, 64), fire(128, 16, 64, 64), @@ -38,17 +45,23 @@ function squeezenet() fire(384, 64, 256, 256), fire(512, 64, 256, 256), Dropout(0.5), - Conv((1, 1), 512 => 1000, relu)), + Conv((1, 1), 512 => nclasses, relu)), AdaptiveMeanPool((1, 1)), MLUtils.flatten) end """ - SqueezeNet(; pretrain = false) + SqueezeNet(; pretrain::Bool = false, inchannels::Integer = 3, + nclasses::Integer = 1000) Create a SqueezeNet ([reference](https://arxiv.org/abs/1602.07360v4)). -Set `pretrain=true` to load the model with pre-trained weights for ImageNet. + +# Arguments + + - `pretrain`: set to `true` to load the pre-trained weights for ImageNet + - `inchannels`: number of input channels. + - `nclasses`: the number of output classes. !!! warning @@ -61,8 +74,9 @@ struct SqueezeNet end @functor SqueezeNet -function SqueezeNet(; pretrain = false) - layers = squeezenet() +function SqueezeNet(; pretrain::Bool = false, inchannels::Integer = 3, + nclasses::Integer = 1000) + layers = squeezenet(; inchannels, nclasses) if pretrain loadpretrain!(layers, "SqueezeNet") end diff --git a/src/convnets/vgg.jl b/src/convnets/vgg.jl index ccfdd2cff..0b6026eb8 100644 --- a/src/convnets/vgg.jl +++ b/src/convnets/vgg.jl @@ -11,7 +11,7 @@ A VGG block of convolution layers - `depth`: number of convolution/convolution + batch norm layers - `batchnorm`: set to `true` to include batch normalization after each convolution """ -function vgg_block(ifilters, ofilters, depth, batchnorm) +function vgg_block(ifilters::Integer, ofilters::Integer, depth::Integer, batchnorm::Bool) k = (3, 3) p = (1, 1) layers = [] @@ -40,7 +40,8 @@ Create VGG convolution layers - `batchnorm`: set to `true` to include batch normalization after each convolution - `inchannels`: number of input channels """ -function vgg_convolutional_layers(config, batchnorm, inchannels) +function vgg_convolutional_layers(config::Vector{<:Tuple}, batchnorm::Bool, + inchannels::Integer) layers = [] ifilters = inchannels for c in config @@ -65,7 +66,8 @@ Create VGG classifier (fully connected) layers - `fcsize`: input and output size of the intermediate fully connected layer - `dropout_rate`: the dropout level between each fully connected layer """ -function vgg_classifier_layers(imsize, nclasses, fcsize, dropout_rate) +function vgg_classifier_layers(imsize::NTuple{3, <:Integer}, nclasses::Integer, + fcsize::Integer, dropout_rate) return Chain(MLUtils.flatten, Dense(Int(prod(imsize)), fcsize, relu), Dropout(dropout_rate), @@ -92,7 +94,8 @@ Create a VGG model (see [`Metalhead.vgg_classifier_layers`](#)) - `dropout_rate`: dropout level between fully connected layers """ -function vgg(imsize; config, inchannels, batchnorm = false, nclasses, fcsize, dropout_rate) +function vgg(imsize::Dims{2}; config, batchnorm::Bool = false, fcsize::Integer = 4096, + dropout_rate = 0.0, inchannels::Integer = 3, nclasses::Integer = 1000) conv = vgg_convolutional_layers(config, batchnorm, inchannels) imsize = outputsize(conv, (imsize..., inchannels); padbatch = true)[1:3] class = vgg_classifier_layers(imsize, nclasses, fcsize, dropout_rate) @@ -109,10 +112,6 @@ const VGG_CONFIGS = Dict(11 => :A, 16 => :D, 19 => :E) -struct VGG - layers::Any -end - """ VGG(imsize::Dims{2}; config, inchannels, batchnorm = false, nclasses, fcsize, dropout_rate) @@ -120,46 +119,53 @@ Construct a VGG model with the specified input image size. Typically, the image ## Keyword Arguments: - - `config` : VGG convolutional block configuration. It is defined as a vector of tuples `(output_channels, num_convolutions)` for each block - - `inchannels`::Integer : number of input channels - - `batchnorm`::Bool : set to `true` to use batch normalization after each convolution - - `nclasses`::Integer : number of output classes + - `config` : VGG convolutional block configuration. It is defined as a vector of tuples + `(output_channels, num_convolutions)` for each block + - `inchannels`: number of input channels + - `batchnorm`: set to `true` to use batch normalization after each convolution + - `nclasses`: number of output classes - `fcsize`: intermediate fully connected layer size (see [`Metalhead.vgg_classifier_layers`](#)) - `dropout_rate`: dropout level between fully connected layers """ -function VGG(imsize::Dims{2}; config, inchannels, batchnorm = false, nclasses, fcsize, - dropout_rate) - layers = vgg(imsize; config, inchannels, batchnorm, nclasses, fcsize, dropout_rate) - return VGG(layers) +struct VGG + layers::Any end - @functor VGG +function VGG(imsize::Dims{2}; config, batchnorm::Bool = false, dropout_rate = 0.5, + inchannels::Integer = 3, nclasses::Integer = 1000) + layers = vgg(imsize; config, inchannels, batchnorm, nclasses, dropout_rate) + return VGG(layers) +end + (m::VGG)(x) = m.layers(x) backbone(m::VGG) = m.layers[1] classifier(m::VGG) = m.layers[2] """ - VGG(depth::Integer = 16; pretrain = false, batchnorm = false) + VGG(depth::Integer; pretrain::Bool = false, batchnorm::Bool = false, + inchannels::Integer = 3, nclasses::Integer = 1000) -Create a VGG style model with specified `depth`. Available values include (11, 13, 16, 19). +Create a VGG style model with specified `depth`. ([reference](https://arxiv.org/abs/1409.1556v6)). -See also [`VGG`](#). # Arguments + - `depth`: the depth of the VGG model. Must be one of [11, 13, 16, 19]. - `pretrain`: set to `true` to load pre-trained model weights for ImageNet + - `batchnorm`: set to `true` to use batch normalization after each convolution + - `inchannels`: number of input channels + - `nclasses`: number of output classes + +See also [`vgg`](#). """ -function VGG(depth::Integer = 16; pretrain = false, batchnorm = false, nclasses = 1000) +function VGG(depth::Integer; pretrain::Bool = false, batchnorm::Bool = false, + inchannels::Integer = 3, nclasses::Integer = 1000) _checkconfig(depth, keys(VGG_CONFIGS)) - model = VGG((224, 224); config = VGG_CONV_CONFIGS[VGG_CONFIGS[depth]], - inchannels = 3, - batchnorm = batchnorm, - nclasses = nclasses, - fcsize = 4096, - dropout_rate = 0.5) + model = VGG((224, 224); config = VGG_CONV_CONFIGS[VGG_CONFIGS[depth]], batchnorm, + inchannels, nclasses) if pretrain && !batchnorm loadpretrain!(model, string("vgg", depth)) elseif pretrain diff --git a/src/layers/conv.jl b/src/layers/conv.jl index 557db23a7..75b40708c 100644 --- a/src/layers/conv.jl +++ b/src/layers/conv.jl @@ -23,9 +23,9 @@ Create a convolution + batch normalization pair with activation. - `groups`: groups for the convolution kernel - `bias`, `weight`, `init`: initialization for the convolution kernel (see [`Flux.Conv`](#)) """ -function conv_norm(kernel_size, inplanes::Int, outplanes::Int, activation = relu; - norm_layer = BatchNorm, revnorm = false, preact = false, use_norm = true, - kwargs...) +function conv_norm(kernel_size, inplanes::Integer, outplanes::Integer, activation = relu; + norm_layer = BatchNorm, revnorm::Bool = false, preact::Bool = false, + use_norm::Bool = true, kwargs...) if !use_norm if (preact || revnorm) throw(ArgumentError("`preact` only supported with `use_norm = true`")) @@ -60,8 +60,8 @@ end """ depthwise_sep_conv_norm(kernel_size, inplanes, outplanes, activation = relu; - revnorm = false, use_norm = (true, true), - stride = 1, pad = 0, dilation = 1, [bias, weight, init]) + revnorm = false, use_norm = (true, true), + stride = 1, pad = 0, dilation = 1, [bias, weight, init]) Create a depthwise separable convolution chain as used in MobileNetv1. This is sequence of layers: @@ -86,9 +86,11 @@ See Fig. 3 in [reference](https://arxiv.org/abs/1704.04861v1). - `dilation`: dilation of the first convolution kernel - `bias`, `weight`, `init`: initialization for the convolution kernel (see [`Flux.Conv`](#)) """ -function depthwise_sep_conv_norm(kernel_size, inplanes, outplanes, activation = relu; - norm_layer = BatchNorm, revnorm = false, - use_norm = (true, true), stride = 1, kwargs...) +function depthwise_sep_conv_norm(kernel_size, inplanes::Integer, outplanes::Integer, + activation = relu; norm_layer = BatchNorm, + revnorm::Bool = false, + use_norm::NTuple{2, Bool} = (true, true), + stride::Integer = 1, kwargs...) return vcat(conv_norm(kernel_size, inplanes, inplanes, activation; norm_layer, revnorm, use_norm = use_norm[1], stride, groups = inplanes, kwargs...), diff --git a/src/layers/drop.jl b/src/layers/drop.jl index b4a882cff..f823d5c22 100644 --- a/src/layers/drop.jl +++ b/src/layers/drop.jl @@ -1,5 +1,6 @@ # Generates the mask to be used for `DropBlock` -@inline function _dropblock_mask(rng, x, gamma, clipped_block_size) +@inline function _dropblock_mask(rng, x::AbstractArray{T, 4}, gamma, + clipped_block_size::Integer) where {T} block_mask = rand_like(rng, x) block_mask .= block_mask .< gamma return 1 .- maxpool(block_mask, (clipped_block_size, clipped_block_size); @@ -28,8 +29,8 @@ If you are an end-user, you do not want this function. Use [`DropBlock`](#) inst """ # TODO add experimental `DropBlock` options from timm such as gaussian noise and # more precise `DropBlock` to deal with edges (#188) -function dropblock(rng::AbstractRNG, x::AbstractArray{T, 4}, drop_block_prob, block_size, - gamma_scale) where {T} +function dropblock(rng::AbstractRNG, x::AbstractArray{T, 4}, drop_block_prob, + block_size::Integer, gamma_scale) where {T} H, W, _, _ = size(x) total_size = H * W clipped_block_size = min(block_size, min(H, W)) @@ -100,7 +101,7 @@ size `block_size` in the input. During inference, it simply returns the input `x - `rng`: can be used to pass in a custom RNG instead of the default. Custom RNGs are only supported on the CPU. """ -function DropBlock(drop_block_prob = 0.1, block_size = 7, gamma_scale = 1.0, +function DropBlock(drop_block_prob = 0.1, block_size::Integer = 7, gamma_scale = 1.0, rng = rng_from_array()) if drop_block_prob == 0.0 return identity diff --git a/src/layers/embeddings.jl b/src/layers/embeddings.jl index 3e85f18d9..560ac074d 100644 --- a/src/layers/embeddings.jl +++ b/src/layers/embeddings.jl @@ -1,7 +1,7 @@ _flatten_spatial(x) = permutedims(reshape(x, (:, size(x, 3), size(x, 4))), (2, 1, 3)) """ - PatchEmbedding(imsize::Dims{2} = (224, 224); inchannels = 3, + PatchEmbedding(imsize::Dims{2} = (224, 224); inchannels::Integer = 3, patch_size::Dims{2} = (16, 16), embedplanes = 768, norm_layer = planes -> identity, flatten = true) @@ -19,8 +19,8 @@ patches. - `flatten`: set true to flatten the input spatial dimensions after the embedding """ function PatchEmbedding(imsize::Dims{2} = (224, 224); inchannels::Integer = 3, - patch_size::Dims{2} = (16, 16), embedplanes = 768, - norm_layer = planes -> identity, flatten = true) + patch_size::Dims{2} = (16, 16), embedplanes::Integer = 768, + norm_layer = planes -> identity, flatten::Bool = true) im_height, im_width = imsize patch_height, patch_width = patch_size @@ -33,13 +33,15 @@ function PatchEmbedding(imsize::Dims{2} = (224, 224); inchannels::Integer = 3, end """ - ViPosEmbedding(embedsize::Integer, npatches::Integer; init = (dims::Dims{2}) -> rand(Float32, dims)) + ViPosEmbedding(embedsize::Integer, npatches::Integer; + init = (dims::Dims{2}) -> rand(Float32, dims)) Positional embedding layer used by many vision transformer-like models. """ struct ViPosEmbedding{T} vectors::T end +@functor ViPosEmbedding function ViPosEmbedding(embedsize::Integer, npatches::Integer; init = (dims::Dims{2}) -> rand(Float32, dims)) @@ -48,22 +50,20 @@ end (p::ViPosEmbedding)(x) = x .+ p.vectors -@functor ViPosEmbedding - """ - ClassTokens(dim; init = Flux.zeros32) + ClassTokens(planes::Integer; init = Flux.zeros32) -Appends class tokens to an input with embedding dimension `dim` for use in many vision transformer models. +Appends class tokens to an input with embedding dimension `planes` for use in many +vision transformer models. """ struct ClassTokens{T} token::T end +@functor ClassTokens -ClassTokens(dim::Integer; init = Flux.zeros32) = ClassTokens(init(dim, 1, 1)) +ClassTokens(planes::Integer; init = Flux.zeros32) = ClassTokens(init(planes, 1, 1)) function (m::ClassTokens)(x::AbstractArray{T, 3}) where {T} tokens = m.token .* MLUtils.ones_like(x, T, (1, 1, size(x, 3))) return hcat(tokens, x) end - -@functor ClassTokens diff --git a/src/layers/mlp.jl b/src/layers/mlp.jl index a3bdb0fb5..3a1c27413 100644 --- a/src/layers/mlp.jl +++ b/src/layers/mlp.jl @@ -47,8 +47,9 @@ end gated_mlp_block(::typeof(identity), args...; kwargs...) = mlp_block(args...; kwargs...) """ - create_classifier(inplanes, nclasses; pool_layer = AdaptiveMeanPool((1, 1)), - dropout_rate = 0.0, use_conv = false) + create_classifier(inplanes::Integer, nclasses::Integer; + pool_layer = AdaptiveMeanPool((1, 1)), + dropout_rate = 0.0, use_conv::Bool = false) Creates a classifier head to be used for models. @@ -61,8 +62,9 @@ Creates a classifier head to be used for models. - `dropout_rate`: dropout rate used in the classifier head. - `use_conv`: whether to use a 1x1 convolutional layer instead of a `Dense` layer. """ -function create_classifier(inplanes, nclasses; pool_layer = AdaptiveMeanPool((1, 1)), - dropout_rate = 0.0, use_conv = false) +function create_classifier(inplanes::Integer, nclasses::Integer; + pool_layer = AdaptiveMeanPool((1, 1)), + dropout_rate = 0.0, use_conv::Bool = false) # Pooling if pool_layer === identity @assert use_conv diff --git a/src/layers/pool.jl b/src/layers/pool.jl index 1962ab0fb..049c06451 100644 --- a/src/layers/pool.jl +++ b/src/layers/pool.jl @@ -1,5 +1,6 @@ """ - AdaptiveMeanMaxPool(output_size = (1, 1); connection = +) + AdaptiveMeanMaxPool(connection = +, output_size::Tuple = (1, 1)) + AdaptiveMeanMaxPool(output_size::Tuple = (1, 1)) A type of adaptive pooling layer which uses both mean and max pooling and combines them to produce a single output. Note that this is equivalent to @@ -10,7 +11,7 @@ produce a single output. Note that this is equivalent to - `output_size`: The size of the output after pooling. - `connection`: The connection type to use. """ -function AdaptiveMeanMaxPool(connection, output_size = (1, 1)) +function AdaptiveMeanMaxPool(connection, output_size::Tuple = (1, 1)) return Parallel(connection, AdaptiveMeanPool(output_size), AdaptiveMaxPool(output_size)) end AdaptiveMeanMaxPool(output_size::Tuple = (1, 1)) = AdaptiveMeanMaxPool(+, output_size) diff --git a/src/layers/scale.jl b/src/layers/scale.jl index 965b50f38..f3a555b76 100644 --- a/src/layers/scale.jl +++ b/src/layers/scale.jl @@ -9,7 +9,7 @@ _input_scale(λ, activation, x) = activation.(λ .* x) _input_scale(λ, ::typeof(identity), x) = λ .* x """ - LayerScale(λ, planes::Integer) + LayerScale(planes::Integer, λ) Creates a `Flux.Scale` layer that performs "`LayerScale`" ([reference](https://arxiv.org/abs/2103.17239)). diff --git a/src/layers/selayers.jl b/src/layers/selayers.jl index db0f3715d..0756225ba 100644 --- a/src/layers/selayers.jl +++ b/src/layers/selayers.jl @@ -15,9 +15,9 @@ Creates a squeeze-and-excitation layer used in MobileNets and SE-Nets. - `norm_layer`: The normalization layer to be used after the convolution layers - `rd_planes`: The number of hidden feature maps in a squeeze and excite layer """ -function squeeze_excite(inplanes; reduction = 16, rd_divisor = 8, - activation = relu, gate_activation = sigmoid, - norm_layer = planes -> identity, +function squeeze_excite(inplanes::Integer; reduction::Integer = 16, + rd_divisor::Integer = 8, activation = relu, + gate_activation = sigmoid, norm_layer = planes -> identity, rd_planes = _round_channels(inplanes ÷ reduction, rd_divisor, 0)) layers = [AdaptiveMeanPool((1, 1)), Conv((1, 1), inplanes => rd_planes), @@ -40,7 +40,7 @@ Effective squeeze-and-excitation layer. - `inplanes`: The number of input feature maps - `gate_activation`: The activation function for the gate layer """ -function effective_squeeze_excite(inplanes; gate_activation = sigmoid, kwargs...) +function effective_squeeze_excite(inplanes::Integer; gate_activation = sigmoid) return SkipConnection(Chain(AdaptiveMeanPool((1, 1)), Conv((1, 1), inplanes, inplanes), gate_activation), .*) diff --git a/src/mixers/core.jl b/src/mixers/core.jl index 9f9d3b305..18f66aaa8 100644 --- a/src/mixers/core.jl +++ b/src/mixers/core.jl @@ -1,7 +1,7 @@ """ - mlpmixer(block, imsize::Dims{2} = (224, 224); inchannels = 3, norm_layer = LayerNorm, + mlpmixer(block, imsize::Dims{2} = (224, 224); inchannels::Integer = 3, norm_layer = LayerNorm, patch_size::Dims{2} = (16, 16), embedplanes = 512, drop_path_rate = 0., - depth = 12, nclasses = 1000, kwargs...) + depth = 12, nclasses::Integer = 1000, kwargs...) Creates a model with the MLPMixer architecture. ([reference](https://arxiv.org/pdf/2105.01601)). @@ -21,10 +21,9 @@ Creates a model with the MLPMixer architecture. - `kwargs`: additional arguments (if any) to pass to the mixer block. Will use the defaults if not specified. """ -function mlpmixer(block, imsize::Dims{2} = (224, 224); inchannels = 3, - norm_layer = LayerNorm, patch_size::Dims{2} = (16, 16), - embedplanes = 512, drop_path_rate = 0.0, - depth = 12, nclasses = 1000, kwargs...) +function mlpmixer(block, imsize::Dims{2} = (224, 224); norm_layer = LayerNorm, + patch_size::Dims{2} = (16, 16), embedplanes = 512, drop_path_rate = 0.0, + depth = 12, inchannels::Integer = 3, nclasses::Integer = 1000, kwargs...) npatches = prod(imsize .÷ patch_size) dp_rates = linear_scheduler(drop_path_rate; depth) layers = Chain(PatchEmbedding(imsize; inchannels, patch_size, embedplanes), diff --git a/src/mixers/gmlp.jl b/src/mixers/gmlp.jl index 9ebd2dce3..df4a52b70 100644 --- a/src/mixers/gmlp.jl +++ b/src/mixers/gmlp.jl @@ -42,9 +42,9 @@ function (m::SpatialGatingUnit)(x) end """ - spatial_gating_block(planes, npatches; mlp_ratio = 4.0, mlp_layer = gated_mlp_block, - norm_layer = LayerNorm, dropout_rate = 0.0, drop_path_rate = 0.0, - activation = gelu) + spatial_gating_block(planes::Integer, npatches::Integer; mlp_ratio = 4.0, + norm_layer = LayerNorm, mlp_layer = gated_mlp_block, + dropout_rate = 0.0, drop_path_rate = 0.0, activation = gelu) Creates a feedforward block based on the gMLP model architecture described in the paper. ([reference](https://arxiv.org/abs/2105.08050)) @@ -60,10 +60,9 @@ Creates a feedforward block based on the gMLP model architecture described in th - `drop_path_rate`: Stochastic depth rate - `activation`: the activation function to use in the MLP blocks """ -function spatial_gating_block(planes, npatches; mlp_ratio = 4.0, norm_layer = LayerNorm, - mlp_layer = gated_mlp_block, dropout_rate = 0.0, - drop_path_rate = 0.0, - activation = gelu) +function spatial_gating_block(planes::Integer, npatches::Integer; mlp_ratio = 4.0, + norm_layer = LayerNorm, mlp_layer = gated_mlp_block, + dropout_rate = 0.0, drop_path_rate = 0.0, activation = gelu) channelplanes = Int(mlp_ratio * planes) sgu = inplanes -> SpatialGatingUnit(inplanes, npatches; norm_layer) return SkipConnection(Chain(norm_layer(planes), @@ -72,14 +71,9 @@ function spatial_gating_block(planes, npatches; mlp_ratio = 4.0, norm_layer = La DropPath(drop_path_rate)), +) end -struct gMLP - layers::Any -end -@functor gMLP - """ - gMLP(size::Symbol = :base; patch_size::Dims{2} = (16, 16), - imsize::Dims{2} = (224, 224), drop_path_rate = 0., nclasses = 1000) + gMLP(size::Symbol; patch_size::Dims{2} = (16, 16), imsize::Dims{2} = (224, 224), + inchannels::Integer = 3, nclasses::Integer = 1000) Creates a model with the gMLP architecture. ([reference](https://arxiv.org/abs/2105.08050)). @@ -89,18 +83,23 @@ Creates a model with the gMLP architecture. - `size`: the size of the model - one of `small`, `base`, `large` or `huge` - `patch_size`: the size of the patches - `imsize`: the size of the input image - - `drop_path_rate`: Stochastic depth rate + - `inchannels`: the number of input channels - `nclasses`: number of output classes See also [`Metalhead.mlpmixer`](#). """ -function gMLP(size::Symbol = :base; patch_size::Dims{2} = (16, 16), - imsize::Dims{2} = (224, 224), drop_path_rate = 0.0, nclasses = 1000) +struct gMLP + layers::Any +end +@functor gMLP + +function gMLP(size::Symbol; imsize::Dims{2} = (224, 224), patch_size::Dims{2} = (16, 16), + inchannels::Integer = 3, nclasses::Integer = 1000) _checkconfig(size, keys(MIXER_CONFIGS)) depth = MIXER_CONFIGS[size][:depth] embedplanes = MIXER_CONFIGS[size][:planes] - layers = mlpmixer(spatial_gating_block, imsize; mlp_layer = gated_mlp_block, - patch_size, embedplanes, drop_path_rate, depth, nclasses) + layers = mlpmixer(spatial_gating_block, imsize; mlp_layer = gated_mlp_block, patch_size, + embedplanes, depth, inchannels, nclasses) return gMLP(layers) end diff --git a/src/mixers/mlpmixer.jl b/src/mixers/mlpmixer.jl index 7b6d4aa09..06aefbd48 100644 --- a/src/mixers/mlpmixer.jl +++ b/src/mixers/mlpmixer.jl @@ -1,6 +1,7 @@ """ - mixerblock(planes, npatches; mlp_ratio = (0.5, 4.0), mlp_layer = mlp_block, - dropout_rate = 0., drop_path_rate = 0., activation = gelu) + mixerblock(planes::Integer, npatches::Integer; mlp_layer = mlp_block, + mlp_ratio = (0.5, 4.0), dropout_rate = 0.0, drop_path_rate = 0.0, + activation = gelu) Creates a feedforward block for the MLPMixer architecture. ([reference](https://arxiv.org/pdf/2105.01601)) @@ -16,9 +17,10 @@ Creates a feedforward block for the MLPMixer architecture. - `drop_path_rate`: Stochastic depth rate - `activation`: the activation function to use in the MLP blocks """ -function mixerblock(planes, npatches; mlp_ratio = (0.5, 4.0), mlp_layer = mlp_block, - dropout_rate = 0.0, drop_path_rate = 0.0, activation = gelu) - tokenplanes, channelplanes = [Int(r * planes) for r in mlp_ratio] +function mixerblock(planes::Integer, npatches::Integer; mlp_layer = mlp_block, + mlp_ratio::NTuple{2, Number} = (0.5, 4.0), dropout_rate = 0.0, + drop_path_rate = 0.0, activation = gelu) + tokenplanes, channelplanes = Int.(planes .* mlp_ratio) return Chain(SkipConnection(Chain(LayerNorm(planes), swapdims((2, 1, 3)), mlp_layer(npatches, tokenplanes; activation, @@ -31,14 +33,9 @@ function mixerblock(planes, npatches; mlp_ratio = (0.5, 4.0), mlp_layer = mlp_bl DropPath(drop_path_rate)), +)) end -struct MLPMixer - layers::Any -end -@functor MLPMixer - """ - MLPMixer(size::Symbol = :base; patch_size::Dims{2} = (16, 16), - imsize::Dims{2} = (224, 224), drop_path_rate = 0., nclasses = 1000) +MLPMixer(size::Symbol; patch_size::Dims{2} = (16, 16), imsize::Dims{2} = (224, 224), + inchannels::Integer = 3, nclasses::Integer = 1000) Creates a model with the MLPMixer architecture. ([reference](https://arxiv.org/pdf/2105.01601)). @@ -49,17 +46,22 @@ Creates a model with the MLPMixer architecture. - `patch_size`: the size of the patches - `imsize`: the size of the input image - `drop_path_rate`: Stochastic depth rate + - `inchannels`: the number of input channels - `nclasses`: number of output classes See also [`Metalhead.mlpmixer`](#). """ -function MLPMixer(size::Symbol = :base; patch_size::Dims{2} = (16, 16), - imsize::Dims{2} = (224, 224), drop_path_rate = 0.0, nclasses = 1000) +struct MLPMixer + layers::Any +end +@functor MLPMixer + +function MLPMixer(size::Symbol; imsize::Dims{2} = (224, 224), patch_size::Dims{2} = (16, 16), + inchannels::Integer = 3, nclasses::Integer = 1000) _checkconfig(size, keys(MIXER_CONFIGS)) depth = MIXER_CONFIGS[size][:depth] embedplanes = MIXER_CONFIGS[size][:planes] - layers = mlpmixer(mixerblock, imsize; patch_size, embedplanes, depth, drop_path_rate, - nclasses) + layers = mlpmixer(mixerblock, imsize; patch_size, embedplanes, depth, inchannels,nclasses) return MLPMixer(layers) end diff --git a/src/mixers/resmlp.jl b/src/mixers/resmlp.jl index 17e340310..f2c9ece15 100644 --- a/src/mixers/resmlp.jl +++ b/src/mixers/resmlp.jl @@ -1,6 +1,6 @@ """ resmixerblock(planes, npatches; dropout_rate = 0., drop_path_rate = 0., mlp_ratio = 4.0, - activation = gelu, λ = 1e-4) + activation = gelu, layerscale_init = 1e-4) Creates a block for the ResMixer architecture. ([reference](https://arxiv.org/abs/2105.03404)). @@ -15,33 +15,28 @@ Creates a block for the ResMixer architecture. - `dropout_rate`: the dropout rate to use in the MLP blocks - `drop_path_rate`: Stochastic depth rate - `activation`: the activation function to use in the MLP blocks - - `λ`: initialisation constant for the LayerScale + - `layerscale_init`: initialisation constant for the LayerScale """ -function resmixerblock(planes, npatches; mlp_ratio = 4.0, mlp_layer = mlp_block, - dropout_rate = 0.0, drop_path_rate = 0.0, activation = gelu, - λ = 1e-4) +function resmixerblock(planes::Integer, npatches::Integer; mlp_layer = mlp_block, + mlp_ratio = 4.0, layerscale_init = 1e-4, dropout_rate = 0.0, + drop_path_rate = 0.0, activation = gelu) return Chain(SkipConnection(Chain(Flux.Scale(planes), swapdims((2, 1, 3)), Dense(npatches, npatches), swapdims((2, 1, 3)), - LayerScale(planes, λ), + LayerScale(planes, layerscale_init), DropPath(drop_path_rate)), +), SkipConnection(Chain(Flux.Scale(planes), mlp_layer(planes, Int(mlp_ratio * planes); dropout_rate, activation), - LayerScale(planes, λ), + LayerScale(planes, layerscale_init), DropPath(drop_path_rate)), +)) end -struct ResMLP - layers::Any -end -@functor ResMLP - """ - ResMLP(size::Symbol = :base; patch_size::Dims{2} = (16, 16), imsize::Dims{2} = (224, 224), - drop_path_rate = 0., nclasses = 1000) + ResMLP(size::Symbol; patch_size::Dims{2} = (16, 16), imsize::Dims{2} = (224, 224), + inchannels::Integer = 3, nclasses::Integer = 1000) Creates a model with the ResMLP architecture. ([reference](https://arxiv.org/abs/2105.03404)). @@ -51,18 +46,23 @@ Creates a model with the ResMLP architecture. - `size`: the size of the model - one of `small`, `base`, `large` or `huge` - `patch_size`: the size of the patches - `imsize`: the size of the input image - - `drop_path_rate`: Stochastic depth rate + - `inchannels`: the number of input channels - `nclasses`: number of output classes See also [`Metalhead.mlpmixer`](#). """ -function ResMLP(size::Symbol = :base; patch_size::Dims{2} = (16, 16), - imsize::Dims{2} = (224, 224), drop_path_rate = 0.0, nclasses = 1000) +struct ResMLP + layers::Any +end +@functor ResMLP + +function ResMLP(size::Symbol; imsize::Dims{2} = (224, 224), patch_size::Dims{2} = (16, 16), + inchannels::Integer = 3, nclasses::Integer = 1000) _checkconfig(size, keys(MIXER_CONFIGS)) depth = MIXER_CONFIGS[size][:depth] embedplanes = MIXER_CONFIGS[size][:planes] layers = mlpmixer(resmixerblock, imsize; mlp_ratio = 4.0, patch_size, embedplanes, - drop_path_rate, depth, nclasses) + depth, inchannels, nclasses) return ResMLP(layers) end diff --git a/src/vit-based/vit.jl b/src/vit-based/vit.jl index 1fece2191..1c049e46e 100644 --- a/src/vit-based/vit.jl +++ b/src/vit-based/vit.jl @@ -12,7 +12,8 @@ Transformer as used in the base ViT architecture. - `mlp_ratio`: ratio of MLP layers to the number of input channels - `dropout_rate`: dropout rate """ -function transformer_encoder(planes, depth, nheads; mlp_ratio = 4.0, dropout_rate = 0.0) +function transformer_encoder(planes::Integer, depth::Integer, nheads::Integer; + mlp_ratio = 4.0, dropout_rate = 0.0) layers = [Chain(SkipConnection(prenorm(planes, MHAttention(planes, nheads; attn_dropout_rate = dropout_rate, @@ -26,9 +27,9 @@ function transformer_encoder(planes, depth, nheads; mlp_ratio = 4.0, dropout_rat end """ - vit(imsize::Dims{2} = (256, 256); inchannels = 3, patch_size::Dims{2} = (16, 16), + vit(imsize::Dims{2} = (256, 256); inchannels::Integer = 3, patch_size::Dims{2} = (16, 16), embedplanes = 768, depth = 6, nheads = 16, mlp_ratio = 4.0, dropout_rate = 0.1, - emb_dropout_rate = 0.1, pool = :class, nclasses = 1000) + emb_dropout_rate = 0.1, pool = :class, nclasses::Integer = 1000) Creates a Vision Transformer (ViT) model. ([reference](https://arxiv.org/abs/2010.11929)). @@ -47,9 +48,10 @@ Creates a Vision Transformer (ViT) model. - `pool`: pooling type, either :class or :mean - `nclasses`: number of classes in the output """ -function vit(imsize::Dims{2} = (256, 256); inchannels = 3, patch_size::Dims{2} = (16, 16), - embedplanes = 768, depth = 6, nheads = 16, mlp_ratio = 4.0, dropout_rate = 0.1, - emb_dropout_rate = 0.1, pool = :class, nclasses = 1000) +function vit(imsize::Dims{2} = (256, 256); inchannels::Integer = 3, + patch_size::Dims{2} = (16, 16), embedplanes::Integer = 768, + depth::Integer = 6, nheads::Integer = 16, mlp_ratio = 4.0, dropout_rate = 0.1, + emb_dropout_rate = 0.1, pool::Symbol = :class, nclasses::Integer = 1000) @assert pool in [:class, :mean] "Pool type must be either `:class` (class token) or `:mean` (mean pooling)" npatches = prod(imsize .÷ patch_size) @@ -74,8 +76,8 @@ const VIT_CONFIGS = Dict(:tiny => (depth = 12, embedplanes = 192, nheads = 3), mlp_ratio = 64 // 13)) """ - ViT(mode::Symbol = base; imsize::Dims{2} = (256, 256), inchannels = 3, - patch_size::Dims{2} = (16, 16), pool = :class, nclasses = 1000) + ViT(mode::Symbol = base; imsize::Dims{2} = (256, 256), inchannels::Integer = 3, + patch_size::Dims{2} = (16, 16), pool = :class, nclasses::Integer = 1000) Creates a Vision Transformer (ViT) model. ([reference](https://arxiv.org/abs/2010.11929)). @@ -97,11 +99,11 @@ struct ViT end @functor ViT -function ViT(mode::Symbol = :base; imsize::Dims{2} = (256, 256), inchannels = 3, - patch_size::Dims{2} = (16, 16), pool = :class, nclasses = 1000) +function ViT(mode::Symbol = :base; imsize::Dims{2} = (256, 256), patch_size::Dims{2} = (16, 16), + inchannels::Integer = 3, nclasses::Integer = 1000) _checkconfig(mode, keys(VIT_CONFIGS)) kwargs = VIT_CONFIGS[mode] - layers = vit(imsize; inchannels, patch_size, nclasses, pool, kwargs...) + layers = vit(imsize; inchannels, patch_size, nclasses, kwargs...) return ViT(layers) end diff --git a/test/convnets.jl b/test/convnets.jl index 40f5ec75a..501ff1be4 100644 --- a/test/convnets.jl +++ b/test/convnets.jl @@ -258,15 +258,12 @@ end end end - @testset "ConvNeXt" verbose = true begin @testset for mode in [:small, :base, :large, :tiny, :xlarge] - @testset for drop_path_rate in [0.0, 0.5] - m = ConvNeXt(mode; drop_path_rate) - @test size(m(x_224)) == (1000, 1) - @test gradtest(m, x_224) - _gc() - end + m = ConvNeXt(mode) + @test size(m(x_224)) == (1000, 1) + @test gradtest(m, x_224) + _gc() end end diff --git a/test/mixers.jl b/test/mixers.jl index 885ff5838..51cdd736e 100644 --- a/test/mixers.jl +++ b/test/mixers.jl @@ -1,32 +1,8 @@ -@testset "MLPMixer" begin - @testset for mode in [:small, :base, :large] #:huge] - @testset for drop_path_rate in [0.0, 0.5] - m = MLPMixer(mode; drop_path_rate) - @test size(m(x_224)) == (1000, 1) - @test gradtest(m, x_224) - _gc() - end - end -end - -@testset "ResMLP" begin - @testset for mode in [:small, :base, :large] #:huge] - @testset for drop_path_rate in [0.0, 0.5] - m = ResMLP(mode; drop_path_rate) - @test size(m(x_224)) == (1000, 1) - @test gradtest(m, x_224) - _gc() - end - end -end - -@testset "gMLP" begin - @testset for mode in [:small, :base, :large] #:huge] - @testset for drop_path_rate in [0.0, 0.5] - m = gMLP(mode; drop_path_rate) - @test size(m(x_224)) == (1000, 1) - @test gradtest(m, x_224) - _gc() - end +@testset for model in [MLPMixer, ResMLP, gMLP] + @testset for mode in [:small, :base, :large] + m = model(mode) + @test size(m(x_224)) == (1000, 1) + @test gradtest(m, x_224) + _gc() end end From 061b1331c1ce9001f061068f48a2a7c6f4fc604d Mon Sep 17 00:00:00 2001 From: Abhirath Anand <74202102+theabhirath@users.noreply.github.com> Date: Mon, 1 Aug 2022 07:48:20 +0530 Subject: [PATCH 3/8] Refine `invertedresidual` --- src/convnets/efficientnet.jl | 5 +++-- src/convnets/mobilenet/mobilenetv2.jl | 2 +- src/convnets/mobilenet/mobilenetv3.jl | 2 +- src/layers/conv.jl | 19 ++++++++++++------- 4 files changed, 17 insertions(+), 11 deletions(-) diff --git a/src/convnets/efficientnet.jl b/src/convnets/efficientnet.jl index 4321e9443..71e6f8f0a 100644 --- a/src/convnets/efficientnet.jl +++ b/src/convnets/efficientnet.jl @@ -36,11 +36,12 @@ function efficientnet(scalings, block_configs; out_channels = _round_channels(scalew(o), 8) repeats = scaled(n) push!(blocks, - invertedresidual(k, in_channels, in_channels * e, out_channels, swish; + invertedresidual((k, k), in_channels, in_channels * e, out_channels, swish; stride = s, reduction = 4)) for _ in 1:(repeats - 1) push!(blocks, - invertedresidual(k, out_channels, out_channels * e, out_channels, swish; + invertedresidual((k, k), out_channels, out_channels * e, out_channels, + swish; stride = 1, reduction = 4)) end end diff --git a/src/convnets/mobilenet/mobilenetv2.jl b/src/convnets/mobilenet/mobilenetv2.jl index a97e7dda1..b97fc16ff 100644 --- a/src/convnets/mobilenet/mobilenetv2.jl +++ b/src/convnets/mobilenet/mobilenetv2.jl @@ -30,7 +30,7 @@ function mobilenetv2(width_mult, configs; inchannels = 3, max_width = 1280, ncla outplanes = _round_channels(c * width_mult, width_mult == 0.1 ? 4 : 8) for i in 1:n push!(layers, - invertedresidual(3, inplanes, inplanes * t, outplanes, a; + invertedresidual((3, 3), inplanes, inplanes * t, outplanes, a; stride = i == 1 ? s : 1)) inplanes = outplanes end diff --git a/src/convnets/mobilenet/mobilenetv3.jl b/src/convnets/mobilenet/mobilenetv3.jl index d8666c5f3..d6873ac57 100644 --- a/src/convnets/mobilenet/mobilenetv3.jl +++ b/src/convnets/mobilenet/mobilenetv3.jl @@ -36,7 +36,7 @@ function mobilenetv3(width_mult, configs; inchannels = 3, max_width = 1024, ncla outplanes = _round_channels(c * width_mult, 8) explanes = _round_channels(inplanes * t, 8) push!(layers, - invertedresidual(k, inplanes, explanes, outplanes, a; + invertedresidual((k, k), inplanes, explanes, outplanes, a; stride = s, reduction = r)) inplanes = outplanes end diff --git a/src/layers/conv.jl b/src/layers/conv.jl index 5610d3be2..557db23a7 100644 --- a/src/layers/conv.jl +++ b/src/layers/conv.jl @@ -114,16 +114,17 @@ Create a basic inverted residual block for MobileNet variants - `reduction`: The reduction factor for the number of hidden feature maps in a squeeze and excite layer (see [`squeeze_excite`](#)). """ -function invertedresidual(kernel_size, inplanes, hidden_planes, outplanes, - activation = relu; stride, reduction = nothing) +function invertedresidual(kernel_size, inplanes::Integer, hidden_planes::Integer, + outplanes::Integer, activation = relu; stride::Integer, + reduction::Union{Nothing, Integer} = nothing) @assert stride in [1, 2] "`stride` has to be 1 or 2" pad = @. (kernel_size - 1) ÷ 2 - conv1 = (inplanes == hidden_planes) ? identity : - Chain(conv_norm((1, 1), inplanes, hidden_planes, activation; bias = false)) + conv1 = (inplanes == hidden_planes) ? (identity,) : + conv_norm((1, 1), inplanes, hidden_planes, activation; bias = false) selayer = isnothing(reduction) ? identity : squeeze_excite(hidden_planes; reduction, activation, gate_activation = hardσ, norm_layer = BatchNorm) - invres = Chain(conv1, + invres = Chain(conv1..., conv_norm(kernel_size, hidden_planes, hidden_planes, activation; bias = false, stride, pad = pad, groups = hidden_planes)..., selayer, @@ -131,6 +132,10 @@ function invertedresidual(kernel_size, inplanes, hidden_planes, outplanes, return (stride == 1 && inplanes == outplanes) ? SkipConnection(invres, +) : invres end -function invertedresidual(kernel_size::Integer, args...; kwargs...) - return invertedresidual((kernel_size, kernel_size), args...; kwargs...) +function invertedresidual(kernel_size, inplanes::Integer, outplanes::Integer, + activation = relu; stride::Integer, expansion, + reduction::Union{Nothing, Integer} = nothing) + hidden_planes = Int(inplanes * expansion) + return invertedresidual(kernel_size, inplanes, hidden_planes, outplanes, activation; + stride, reduction) end From 4e46d7b9db108e95417a104388d981bb0a71fe92 Mon Sep 17 00:00:00 2001 From: Abhirath Anand <74202102+theabhirath@users.noreply.github.com> Date: Mon, 1 Aug 2022 11:10:48 +0530 Subject: [PATCH 4/8] Expose `inchannels` and `nclasses` for every model Also a. more type annotations b. Expose only configurations vital to the model API in terms of pretraining at the highest level --- .github/workflows/CI.yml | 3 +- src/convnets/alexnet.jl | 12 ++--- src/convnets/convmixer.jl | 20 +++---- src/convnets/convnext.jl | 42 ++++++++------- src/convnets/densenet.jl | 49 +++++++++-------- src/convnets/efficientnet.jl | 36 ++++++------- src/convnets/inception/googlenet.jl | 8 +-- src/convnets/inception/inceptionresnetv2.jl | 12 +++-- src/convnets/inception/inceptionv3.jl | 14 ++--- src/convnets/inception/inceptionv4.jl | 14 ++--- src/convnets/inception/xception.jl | 33 ++++++------ src/convnets/mobilenet/mobilenetv1.jl | 19 +++---- src/convnets/mobilenet/mobilenetv2.jl | 29 +++++----- src/convnets/mobilenet/mobilenetv3.jl | 29 +++++----- src/convnets/resnets/core.jl | 2 +- src/convnets/resnets/resnet.jl | 10 ++-- src/convnets/resnets/resnext.jl | 8 +-- src/convnets/resnets/seresnet.jl | 13 ++--- src/convnets/squeezenet.jl | 34 ++++++++---- src/convnets/vgg.jl | 60 +++++++++++---------- src/layers/conv.jl | 18 ++++--- src/layers/drop.jl | 9 ++-- src/layers/embeddings.jl | 22 ++++---- src/layers/mlp.jl | 10 ++-- src/layers/pool.jl | 5 +- src/layers/scale.jl | 2 +- src/layers/selayers.jl | 8 +-- src/mixers/core.jl | 11 ++-- src/mixers/gmlp.jl | 37 +++++++------ src/mixers/mlpmixer.jl | 34 ++++++------ src/mixers/resmlp.jl | 36 ++++++------- src/vit-based/vit.jl | 24 +++++---- test/convnets.jl | 11 ++-- test/mixers.jl | 36 +++---------- 34 files changed, 363 insertions(+), 347 deletions(-) diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml index 8de5bd6e0..c13f1c2d6 100644 --- a/.github/workflows/CI.yml +++ b/.github/workflows/CI.yml @@ -34,8 +34,7 @@ jobs: - '"Inception"' - '"DenseNet"' - '["ConvNeXt", "ConvMixer"]' - - 'r"ViTs"' - - 'r"Mixers"' + - '[r"ViTs", r"Mixers"]' steps: - uses: actions/checkout@v2 - uses: julia-actions/setup-julia@v1 diff --git a/src/convnets/alexnet.jl b/src/convnets/alexnet.jl index 8ff65ffef..75ba5ad48 100644 --- a/src/convnets/alexnet.jl +++ b/src/convnets/alexnet.jl @@ -1,5 +1,5 @@ """ - alexnet(; nclasses = 1000) + alexnet(; nclasses::Integer = 1000) Create an AlexNet model ([reference](https://papers.nips.cc/paper/2012/file/c399862d3b9d6b76c8436e924a68c45b-Paper.pdf)). @@ -8,8 +8,8 @@ Create an AlexNet model - `nclasses`: the number of output classes """ -function alexnet(; nclasses = 1000) - layers = Chain(Chain(Conv((11, 11), 3 => 64, relu; stride = (4, 4), pad = (2, 2)), +function alexnet(; inchannels::Integer = 3, nclasses::Integer = 1000) + layers = Chain(Chain(Conv((11, 11), inchannels => 64, relu; stride = (4, 4), pad = (2, 2)), MaxPool((3, 3); stride = (2, 2)), Conv((5, 5), 64 => 192, relu; pad = (2, 2)), MaxPool((3, 3); stride = (2, 2)), @@ -28,7 +28,7 @@ function alexnet(; nclasses = 1000) end """ - AlexNet(; pretrain = false, nclasses = 1000) + AlexNet(; pretrain::Bool = false, nclasses::Integer = 1000) Create a `AlexNet`. See also [`alexnet`](#). @@ -47,8 +47,8 @@ struct AlexNet end @functor AlexNet -function AlexNet(; pretrain = false, nclasses = 1000) - layers = alexnet(; nclasses = nclasses) +function AlexNet(; pretrain::Bool = false, inchannels::Integer = 3, nclasses::Integer = 1000) + layers = alexnet(; inchannels, nclasses) if pretrain loadpretrain!(layers, "AlexNet") end diff --git a/src/convnets/convmixer.jl b/src/convnets/convmixer.jl index aa3d144d2..c75303184 100644 --- a/src/convnets/convmixer.jl +++ b/src/convnets/convmixer.jl @@ -1,6 +1,7 @@ """ - convmixer(planes, depth; inchannels = 3, kernel_size = (9, 9), patch_size::Dims{2} = 7, - activation = gelu, nclasses = 1000) + convmixer(planes::Integer, depth::Integer; kernel_size = (9, 9), + patch_size::Dims{2} = (7, 7), activation = gelu, + inchannels::Integer = 3, nclasses::Integer = 1000) Creates a ConvMixer model. ([reference](https://arxiv.org/abs/2201.09792)) @@ -9,14 +10,15 @@ Creates a ConvMixer model. - `planes`: number of planes in the output of each block - `depth`: number of layers - - `inchannels`: The number of channels in the input. - `kernel_size`: kernel size of the convolutional layers - `patch_size`: size of the patches - `activation`: activation function used after the convolutional layers + - `inchannels`: The number of channels in the input. - `nclasses`: number of classes in the output """ -function convmixer(planes, depth; inchannels = 3, kernel_size = (9, 9), - patch_size::Dims{2} = (7, 7), activation = gelu, nclasses = 1000) +function convmixer(planes::Integer, depth::Integer; kernel_size = (9, 9), + patch_size::Dims{2} = (7, 7), activation = gelu, + inchannels::Integer = 3, nclasses::Integer = 1000) stem = conv_norm(patch_size, inchannels, planes, activation; preact = true, stride = patch_size[1]) blocks = [Chain(SkipConnection(Chain(conv_norm(kernel_size, planes, planes, activation; @@ -39,7 +41,7 @@ const CONVMIXER_CONFIGS = Dict(:base => Dict(:planes => 1536, :depth => 20, :patch_size => (7, 7))) """ - ConvMixer(mode::Symbol = :base; inchannels = 3, activation = gelu, nclasses = 1000) + ConvMixer(mode::Symbol; inchannels::Integer = 3, nclasses::Integer = 1000) Creates a ConvMixer model. ([reference](https://arxiv.org/abs/2201.09792)) @@ -48,7 +50,6 @@ Creates a ConvMixer model. - `mode`: the mode of the model, either `:base`, `:small` or `:large` - `inchannels`: The number of channels in the input. - - `activation`: activation function used after the convolutional layers - `nclasses`: number of classes in the output """ struct ConvMixer @@ -56,14 +57,13 @@ struct ConvMixer end @functor ConvMixer -function ConvMixer(mode::Symbol = :base; inchannels = 3, activation = gelu, nclasses = 1000) +function ConvMixer(mode::Symbol; inchannels::Integer = 3, nclasses::Integer = 1000) _checkconfig(mode, keys(CONVMIXER_CONFIGS)) planes = CONVMIXER_CONFIGS[mode][:planes] depth = CONVMIXER_CONFIGS[mode][:depth] kernel_size = CONVMIXER_CONFIGS[mode][:kernel_size] patch_size = CONVMIXER_CONFIGS[mode][:patch_size] - layers = convmixer(planes, depth; inchannels, kernel_size, patch_size, activation, - nclasses) + layers = convmixer(planes, depth; inchannels, kernel_size, patch_size, nclasses) return ConvMixer(layers) end diff --git a/src/convnets/convnext.jl b/src/convnets/convnext.jl index e6ccee16a..d7c39cc04 100644 --- a/src/convnets/convnext.jl +++ b/src/convnets/convnext.jl @@ -1,5 +1,5 @@ """ - convnextblock(planes, drop_path_rate = 0., λ = 1f-6) + convnextblock(planes::Integer, drop_path_rate = 0.0, layerscale_init = 1.0f-6) Creates a single block of ConvNeXt. ([reference](https://arxiv.org/abs/2201.03545)) @@ -8,21 +8,23 @@ Creates a single block of ConvNeXt. - `planes`: number of input channels. - `drop_path_rate`: Stochastic depth rate. - - `λ`: Initial value for [`LayerScale`](#) + - `layerscale_init`: Initial value for [`LayerScale`](#) """ -function convnextblock(planes, drop_path_rate = 0.0, λ = 1.0f-6) +function convnextblock(planes::Integer, drop_path_rate = 0.0, layerscale_init = 1.0f-6) layers = SkipConnection(Chain(DepthwiseConv((7, 7), planes => planes; pad = 3), swapdims((3, 1, 2, 4)), LayerNorm(planes; ϵ = 1.0f-6), mlp_block(planes, 4 * planes), - LayerScale(planes, λ), + LayerScale(planes, layerscale_init), swapdims((2, 3, 1, 4)), DropPath(drop_path_rate)), +) return layers end """ - convnext(depths, planes; inchannels = 3, drop_path_rate = 0., λ = 1f-6, nclasses = 1000) + convnext(depths::Vector{<:Integer}, planes::Vector{<:Integer}; + drop_path_rate = 0.0, layerscale_init = 1.0f-6, inchannels::Integer = 3, + nclasses::Integer = 1000) Creates the layers for a ConvNeXt model. ([reference](https://arxiv.org/abs/2201.03545)) @@ -33,12 +35,13 @@ Creates the layers for a ConvNeXt model. - `depths`: list with configuration for depth of each block - `planes`: list with configuration for number of output channels in each block - `drop_path_rate`: Stochastic depth rate. - - `λ`: Initial value for [`LayerScale`](#) + - `layerscale_init`: Initial value for [`LayerScale`](#) ([reference](https://arxiv.org/abs/2103.17239)) - `nclasses`: number of output classes """ -function convnext(depths, planes; inchannels = 3, drop_path_rate = 0.0, λ = 1.0f-6, - nclasses = 1000) +function convnext(depths::Vector{<:Integer}, planes::Vector{<:Integer}; + drop_path_rate = 0.0, layerscale_init = 1.0f-6, inchannels::Integer = 3, + nclasses::Integer = 1000) @assert length(depths) == length(planes) "`planes` should have exactly one value for each block" downsample_layers = [] @@ -54,7 +57,9 @@ function convnext(depths, planes; inchannels = 3, drop_path_rate = 0.0, λ = 1.0 dp_rates = linear_scheduler(drop_path_rate; depth = sum(depths)) cur = 0 for i in eachindex(depths) - push!(stages, [convnextblock(planes[i], dp_rates[cur + j], λ) for j in 1:depths[i]]) + push!(stages, + [convnextblock(planes[i], dp_rates[cur + j], layerscale_init) + for j in 1:depths[i]]) cur += depths[i] end backbone = collect(Iterators.flatten(Iterators.flatten(zip(downsample_layers, stages)))) @@ -72,13 +77,8 @@ const CONVNEXT_CONFIGS = Dict(:tiny => ([3, 3, 9, 3], [96, 192, 384, 768]), :large => ([3, 3, 27, 3], [192, 384, 768, 1536]), :xlarge => ([3, 3, 27, 3], [256, 512, 1024, 2048])) -struct ConvNeXt - layers::Any -end -@functor ConvNeXt - """ - ConvNeXt(mode::Symbol = :base; inchannels = 3, drop_path_rate = 0., λ = 1f-6, nclasses = 1000) + ConvNeXt(mode::Symbol; inchannels::Integer = 3, nclasses::Integer = 1000) Creates a ConvNeXt model. ([reference](https://arxiv.org/abs/2201.03545)) @@ -86,16 +86,18 @@ Creates a ConvNeXt model. # Arguments - `inchannels`: The number of channels in the input. - - `drop_path_rate`: Stochastic depth rate. - - `λ`: Init value for [LayerScale](https://arxiv.org/abs/2103.17239) - `nclasses`: number of output classes See also [`Metalhead.convnext`](#). """ -function ConvNeXt(mode::Symbol = :base; inchannels = 3, drop_path_rate = 0.0, λ = 1.0f-6, - nclasses = 1000) +struct ConvNeXt + layers::Any +end +@functor ConvNeXt + +function ConvNeXt(mode::Symbol; inchannels::Integer = 3, nclasses::Integer = 1000) _checkconfig(mode, keys(CONVNEXT_CONFIGS)) - layers = convnext(CONVNEXT_CONFIGS[mode]...; inchannels, drop_path_rate, λ, nclasses) + layers = convnext(CONVNEXT_CONFIGS[mode]...; inchannels, nclasses) return ConvNeXt(layers) end diff --git a/src/convnets/densenet.jl b/src/convnets/densenet.jl index 332b5551f..0b164e2ab 100644 --- a/src/convnets/densenet.jl +++ b/src/convnets/densenet.jl @@ -10,7 +10,7 @@ Create a Densenet bottleneck layer - `outplanes`: number of output feature maps on bottleneck branch (and scaling factor for inner feature maps; see ref) """ -function dense_bottleneck(inplanes, outplanes) +function dense_bottleneck(inplanes::Integer, outplanes::Integer) inner_channels = 4 * outplanes return SkipConnection(Chain(conv_norm((1, 1), inplanes, inner_channels; bias = false, revnorm = true)..., @@ -30,7 +30,7 @@ Create a DenseNet transition sequence - `inplanes`: number of input feature maps - `outplanes`: number of output feature maps """ -function transition(inplanes, outplanes) +function transition(inplanes::Integer, outplanes::Integer) return Chain(conv_norm((1, 1), inplanes, outplanes; bias = false, revnorm = true)..., MeanPool((2, 2))) end @@ -48,14 +48,14 @@ the number of output feature maps by `growth_rates` with each block - `growth_rates`: the growth (additive) rates of output feature maps after each block (a vector of `k`s from the ref) """ -function dense_block(inplanes, growth_rates) +function dense_block(inplanes::Integer, growth_rates) return [dense_bottleneck(i, o) for (i, o) in zip(inplanes .+ cumsum([0, growth_rates[1:(end - 1)]...]), growth_rates)] end """ - densenet(inplanes, growth_rates; reduction = 0.5, nclasses = 1000) + densenet(inplanes, growth_rates; reduction = 0.5, nclasses::Integer = 1000) Create a DenseNet model ([reference](https://arxiv.org/abs/1608.06993)). @@ -68,9 +68,11 @@ Create a DenseNet model - `reduction`: the factor by which the number of feature maps is scaled across each transition - `nclasses`: the number of output classes """ -function densenet(inplanes, growth_rates; reduction = 0.5, nclasses = 1000) +function densenet(inplanes::Integer, growth_rates; reduction = 0.5, inchannels::Integer = 3, + nclasses::Integer = 1000) layers = [] - append!(layers, conv_norm((7, 7), 3, inplanes; stride = 2, pad = (3, 3), bias = false)) + append!(layers, + conv_norm((7, 7), inchannels, inplanes; stride = 2, pad = (3, 3), bias = false)) push!(layers, MaxPool((3, 3); stride = 2, pad = (1, 1))) outplanes = 0 for (i, rates) in enumerate(growth_rates) @@ -88,7 +90,7 @@ function densenet(inplanes, growth_rates; reduction = 0.5, nclasses = 1000) end """ - densenet(nblocks; growth_rate = 32, reduction = 0.5, nclasses = 1000) + densenet(nblocks; growth_rate = 32, reduction = 0.5, nclasses::Integer = 1000) Create a DenseNet model ([reference](https://arxiv.org/abs/1608.06993)). @@ -100,15 +102,15 @@ Create a DenseNet model - `reduction`: the factor by which the number of feature maps is scaled across each transition - `nclasses`: the number of output classes """ -function densenet(nblocks::NTuple{N, <:Integer}; growth_rate = 32, reduction = 0.5, - nclasses = 1000) where {N} +function densenet(nblocks::Vector{<:Integer}; growth_rate::Integer = 32, reduction = 0.5, + inchannels::Integer = 3, nclasses::Integer = 1000) return densenet(2 * growth_rate, [fill(growth_rate, n) for n in nblocks]; - reduction = reduction, nclasses = nclasses) + reduction, inchannels, nclasses) end """ - DenseNet(nblocks::NTuple{N, <:Integer}; - growth_rate = 32, reduction = 0.5, nclasses = 1000) + DenseNet(nblocks::Vector{<:Integer}; growth_rate::Integer = 32, reduction = 0.5, + inchannels = 3, nclasses::Integer = 1000) Create a DenseNet model ([reference](https://arxiv.org/abs/1608.06993)). @@ -124,29 +126,26 @@ See also [`densenet`](#). struct DenseNet layers::Any end +@functor DenseNet -function DenseNet(nblocks::NTuple{N, <:Integer}; - growth_rate = 32, reduction = 0.5, nclasses = 1000) where {N} - layers = densenet(nblocks; growth_rate = growth_rate, - reduction = reduction, - nclasses = nclasses) +function DenseNet(nblocks::Vector{<:Integer}; growth_rate::Integer = 32, reduction = 0.5, + inchannels = 3, nclasses::Integer = 1000) + layers = densenet(nblocks; growth_rate, reduction, inchannels, nclasses) return DenseNet(layers) end -@functor DenseNet - (m::DenseNet)(x) = m.layers(x) backbone(m::DenseNet) = m.layers[1] classifier(m::DenseNet) = m.layers[2] -const DENSENET_CONFIGS = Dict(121 => (6, 12, 24, 16), - 161 => (6, 12, 36, 24), - 169 => (6, 12, 32, 32), - 201 => (6, 12, 48, 32)) +const DENSENET_CONFIGS = Dict(121 => [6, 12, 24, 16], + 161 => [6, 12, 36, 24], + 169 => [6, 12, 32, 32], + 201 => [6, 12, 48, 32]) """ - DenseNet(config::Integer = 121; pretrain = false, nclasses = 1000) + DenseNet(config::Integer = 121; pretrain::Bool = false, nclasses::Integer = 1000) DenseNet(transition_configs::NTuple{N,Integer}) Create a DenseNet model with specified configuration. Currently supported values are (121, 161, 169, 201) @@ -159,7 +158,7 @@ Set `pretrain = true` to load the model with pre-trained weights for ImageNet. See also [`Metalhead.densenet`](#). """ -function DenseNet(config::Integer = 121; pretrain = false, nclasses = 1000) +function DenseNet(config::Integer = 121; pretrain::Bool = false, nclasses::Integer = 1000) _checkconfig(config, keys(DENSENET_CONFIGS)) model = DenseNet(DENSENET_CONFIGS[config]; nclasses = nclasses) if pretrain diff --git a/src/convnets/efficientnet.jl b/src/convnets/efficientnet.jl index 71e6f8f0a..730840fa4 100644 --- a/src/convnets/efficientnet.jl +++ b/src/convnets/efficientnet.jl @@ -1,6 +1,6 @@ """ - efficientnet(scalings, block_configs; - inchannels = 3, nclasses = 1000, max_width = 1280) + efficientnet(scalings, block_configs; max_width::Integer = 1280, + inchannels::Integer = 3, nclasses::Integer = 1000) Create an EfficientNet model ([reference](https://arxiv.org/abs/1905.11946v5)). @@ -22,8 +22,8 @@ Create an EfficientNet model ([reference](https://arxiv.org/abs/1905.11946v5)). - `max_width`: maximum number of output channels before the fully connected classification blocks """ -function efficientnet(scalings, block_configs; - inchannels = 3, nclasses = 1000, max_width = 1280) +function efficientnet(scalings, block_configs; max_width::Integer = 1280, + inchannels::Integer = 3, nclasses::Integer = 1000) wscale, dscale = scalings scalew(w) = wscale ≈ 1 ? w : ceil(Int64, wscale * w) scaled(d) = dscale ≈ 1 ? d : ceil(Int64, dscale * d) @@ -36,12 +36,11 @@ function efficientnet(scalings, block_configs; out_channels = _round_channels(scalew(o), 8) repeats = scaled(n) push!(blocks, - invertedresidual((k, k), in_channels, in_channels * e, out_channels, swish; + invertedresidual((k, k), in_channels, out_channels, swish; expansion = e, stride = s, reduction = 4)) for _ in 1:(repeats - 1) push!(blocks, - invertedresidual((k, k), out_channels, out_channels * e, out_channels, - swish; + invertedresidual((k, k), out_channels, out_channels, swish; expansion = e, stride = 1, reduction = 4)) end end @@ -74,6 +73,7 @@ const EFFICIENTNET_BLOCK_CONFIGS = [ # w: width scaling # d: depth scaling # r: image resolution +# Data is organised as (r, (w, d)) const EFFICIENTNET_GLOBAL_CONFIGS = Dict(:b0 => (224, (1.0, 1.0)), :b1 => (240, (1.0, 1.1)), :b2 => (260, (1.1, 1.2)), @@ -84,14 +84,9 @@ const EFFICIENTNET_GLOBAL_CONFIGS = Dict(:b0 => (224, (1.0, 1.0)), :b7 => (600, (2.0, 3.1)), :b8 => (672, (2.2, 3.6))) -struct EfficientNet - layers::Any -end -@functor EfficientNet - """ - EfficientNet(scalings, block_configs; - inchannels = 3, nclasses = 1000, max_width = 1280) + EfficientNet(scalings, block_configs; max_width::Integer = 1280, + inchannels::Integer = 3, nclasses::Integer = 1000) Create an EfficientNet model ([reference](https://arxiv.org/abs/1905.11946v5)). See also [`efficientnet`](#). @@ -114,8 +109,13 @@ See also [`efficientnet`](#). - `max_width`: maximum number of output channels before the fully connected classification blocks """ -function EfficientNet(scalings, block_configs; - inchannels = 3, nclasses = 1000, max_width = 1280) +struct EfficientNet + layers::Any +end +@functor EfficientNet + +function EfficientNet(scalings, block_configs; max_width::Integer = 1280, + inchannels::Integer = 3, nclasses::Integer = 1000) layers = efficientnet(scalings, block_configs; inchannels, nclasses, max_width) return EfficientNet(layers) end @@ -126,7 +126,7 @@ backbone(m::EfficientNet) = m.layers[1] classifier(m::EfficientNet) = m.layers[2] """ - EfficientNet(name::Symbol; pretrain = false) + EfficientNet(name::Symbol; pretrain::Bool = false) Create an EfficientNet model ([reference](https://arxiv.org/abs/1905.11946v5)). See also [`efficientnet`](#). @@ -137,7 +137,7 @@ See also [`efficientnet`](#). (can be `:b0`, `:b1`, `:b2`, `:b3`, `:b4`, `:b5`, `:b6`, `:b7`, `:b8`) - `pretrain`: set to `true` to load the pre-trained weights for ImageNet """ -function EfficientNet(name::Symbol; pretrain = false) +function EfficientNet(name::Symbol; pretrain::Bool = false) _checkconfig(name, keys(EFFICIENTNET_GLOBAL_CONFIGS)) model = EfficientNet(EFFICIENTNET_GLOBAL_CONFIGS[name][2], EFFICIENTNET_BLOCK_CONFIGS) pretrain && loadpretrain!(model, string("efficientnet-", name)) diff --git a/src/convnets/inception/googlenet.jl b/src/convnets/inception/googlenet.jl index 8a88ca943..90f92ddfc 100644 --- a/src/convnets/inception/googlenet.jl +++ b/src/convnets/inception/googlenet.jl @@ -27,7 +27,7 @@ function _inceptionblock(inplanes, out_1x1, red_3x3, out_3x3, red_5x5, out_5x5, end """ - googlenet(; nclasses = 1000) + googlenet(; nclasses::Integer = 1000) Create an Inception-v1 model (commonly referred to as GoogLeNet) ([reference](https://arxiv.org/abs/1409.4842v1)). @@ -36,7 +36,7 @@ Create an Inception-v1 model (commonly referred to as GoogLeNet) - `nclasses`: the number of output classes """ -function googlenet(; nclasses = 1000) +function googlenet(; nclasses::Integer = 1000) layers = Chain(Chain(Conv((7, 7), 3 => 64; stride = 2, pad = 3), MaxPool((3, 3); stride = 2, pad = 1), Conv((1, 1), 64 => 64), @@ -61,7 +61,7 @@ function googlenet(; nclasses = 1000) end """ - GoogLeNet(; pretrain = false, nclasses = 1000) + GoogLeNet(; pretrain::Bool = false, nclasses::Integer = 1000) Create an Inception-v1 model (commonly referred to as `GoogLeNet`) ([reference](https://arxiv.org/abs/1409.4842v1)). @@ -82,7 +82,7 @@ struct GoogLeNet end @functor GoogLeNet -function GoogLeNet(; pretrain = false, nclasses = 1000) +function GoogLeNet(; pretrain::Bool = false, nclasses::Integer = 1000) layers = googlenet(; nclasses = nclasses) if pretrain loadpretrain!(layers, "GoogLeNet") diff --git a/src/convnets/inception/inceptionresnetv2.jl b/src/convnets/inception/inceptionresnetv2.jl index 4b4b78706..747da2fb2 100644 --- a/src/convnets/inception/inceptionresnetv2.jl +++ b/src/convnets/inception/inceptionresnetv2.jl @@ -64,7 +64,7 @@ function block8(scale = 1.0f0; activation = identity) end """ - inceptionresnetv2(; inchannels = 3, dropout_rate = 0.0, nclasses = 1000) + inceptionresnetv2(; inchannels::Integer = 3, dropout_rate = 0.0, nclasses::Integer = 1000) Creates an InceptionResNetv2 model. ([reference](https://arxiv.org/abs/1602.07261)) @@ -75,7 +75,8 @@ Creates an InceptionResNetv2 model. - `dropout_rate`: rate of dropout in classifier head. - `nclasses`: the number of output classes. """ -function inceptionresnetv2(; inchannels = 3, dropout_rate = 0.0, nclasses = 1000) +function inceptionresnetv2(; inchannels::Integer = 3, dropout_rate = 0.0, + nclasses::Integer = 1000) body = Chain(conv_norm((3, 3), inchannels, 32; stride = 2)..., conv_norm((3, 3), 32, 32)..., conv_norm((3, 3), 32, 64; pad = 1)..., @@ -97,7 +98,7 @@ function inceptionresnetv2(; inchannels = 3, dropout_rate = 0.0, nclasses = 1000 end """ - InceptionResNetv2(; pretrain = false, inchannels = 3, dropout_rate = 0.0, nclasses = 1000) + InceptionResNetv2(; pretrain::Bool = false, inchannels::Integer = 3, dropout_rate = 0.0, nclasses::Integer = 1000) Creates an InceptionResNetv2 model. ([reference](https://arxiv.org/abs/1602.07261)) @@ -118,8 +119,9 @@ struct InceptionResNetv2 end @functor InceptionResNetv2 -function InceptionResNetv2(; pretrain = false, inchannels = 3, dropout_rate = 0.0, - nclasses = 1000) +function InceptionResNetv2(; pretrain::Bool = false, inchannels::Integer = 3, + dropout_rate = 0.0, + nclasses::Integer = 1000) layers = inceptionresnetv2(; inchannels, dropout_rate, nclasses) if pretrain loadpretrain!(layers, "InceptionResNetv2") diff --git a/src/convnets/inception/inceptionv3.jl b/src/convnets/inception/inceptionv3.jl index 68b283838..8d9977d80 100644 --- a/src/convnets/inception/inceptionv3.jl +++ b/src/convnets/inception/inceptionv3.jl @@ -127,7 +127,7 @@ function inceptionv3_e(inplanes) end """ - inceptionv3(; nclasses = 1000) + inceptionv3(; inchannels::Integer = 3, nclasses::Integer = 1000) Create an Inception-v3 model ([reference](https://arxiv.org/abs/1512.00567v3)). @@ -135,8 +135,8 @@ Create an Inception-v3 model ([reference](https://arxiv.org/abs/1512.00567v3)). - `nclasses`: the number of output classes """ -function inceptionv3(; nclasses = 1000) - layer = Chain(Chain(conv_norm((3, 3), 3, 32; stride = 2)..., +function inceptionv3(; inchannels::Integer = 3, nclasses::Integer = 1000) + layer = Chain(Chain(conv_norm((3, 3), inchannels, 32; stride = 2)..., conv_norm((3, 3), 32, 32)..., conv_norm((3, 3), 32, 64; pad = 1)..., MaxPool((3, 3); stride = 2), @@ -162,7 +162,7 @@ function inceptionv3(; nclasses = 1000) end """ - Inceptionv3(; pretrain = false, nclasses = 1000) + Inceptionv3(; pretrain::Bool = false, inchannels::Integer = 3, nclasses::Integer = 1000) Create an Inception-v3 model ([reference](https://arxiv.org/abs/1512.00567v3)). See also [`inceptionv3`](#). @@ -170,6 +170,7 @@ See also [`inceptionv3`](#). # Arguments - `pretrain`: set to `true` to load the pre-trained weights for ImageNet + - `inchannels`: number of input channels - `nclasses`: the number of output classes !!! warning @@ -180,8 +181,9 @@ struct Inceptionv3 layers::Any end -function Inceptionv3(; pretrain = false, nclasses = 1000) - layers = inceptionv3(; nclasses = nclasses) +function Inceptionv3(; pretrain::Bool = false, inchannels::Integer = 3, + nclasses::Integer = 1000) + layers = inceptionv3(; inchannels, nclasses) if pretrain loadpretrain!(layers, "Inceptionv3") end diff --git a/src/convnets/inception/inceptionv4.jl b/src/convnets/inception/inceptionv4.jl index bb03646ec..b84232fb8 100644 --- a/src/convnets/inception/inceptionv4.jl +++ b/src/convnets/inception/inceptionv4.jl @@ -82,7 +82,7 @@ function inceptionv4_c() end """ - inceptionv4(; inchannels = 3, dropout_rate = 0.0, nclasses = 1000) + inceptionv4(; inchannels::Integer = 3, dropout_rate = 0.0, nclasses::Integer = 1000) Create an Inceptionv4 model. ([reference](https://arxiv.org/abs/1602.07261)) @@ -93,7 +93,8 @@ Create an Inceptionv4 model. - `dropout_rate`: rate of dropout in classifier head. - `nclasses`: the number of output classes. """ -function inceptionv4(; inchannels = 3, dropout_rate = 0.0, nclasses = 1000) +function inceptionv4(; dropout_rate = 0.0, inchannels::Integer = 3, + nclasses::Integer = 1000) body = Chain(conv_norm((3, 3), inchannels, 32; stride = 2)..., conv_norm((3, 3), 32, 32)..., conv_norm((3, 3), 32, 64; pad = 1)..., @@ -122,7 +123,7 @@ function inceptionv4(; inchannels = 3, dropout_rate = 0.0, nclasses = 1000) end """ - Inceptionv4(; pretrain = false, inchannels = 3, dropout_rate = 0.0, nclasses = 1000) + Inceptionv4(; pretrain::Bool = false, inchannels::Integer = 3, nclasses::Integer = 1000) Creates an Inceptionv4 model. ([reference](https://arxiv.org/abs/1602.07261)) @@ -131,7 +132,6 @@ Creates an Inceptionv4 model. - `pretrain`: set to `true` to load the pre-trained weights for ImageNet - `inchannels`: number of input channels. - - `dropout_rate`: rate of dropout in classifier head. - `nclasses`: the number of output classes. !!! warning @@ -143,9 +143,9 @@ struct Inceptionv4 end @functor Inceptionv4 -function Inceptionv4(; pretrain = false, inchannels = 3, dropout_rate = 0.0, - nclasses = 1000) - layers = inceptionv4(; inchannels, dropout_rate, nclasses) +function Inceptionv4(; pretrain::Bool = false, inchannels::Integer = 3, + nclasses::Integer = 1000) + layers = inceptionv4(; inchannels, nclasses) if pretrain loadpretrain!(layers, "Inceptionv4") end diff --git a/src/convnets/inception/xception.jl b/src/convnets/inception/xception.jl index 3c6d8331a..8d2ad13d8 100644 --- a/src/convnets/inception/xception.jl +++ b/src/convnets/inception/xception.jl @@ -1,6 +1,7 @@ """ - xception_block(inchannels, outchannels, nrepeats; stride = 1, start_with_relu = true, - grow_at_start = true) + xception_block(inchannels::Integer, outchannels::Integer, nrepeats::Integer; + stride::Integer = 1, start_with_relu::Bool = true, + grow_at_start::Bool = true) Create an Xception block. ([reference](https://arxiv.org/abs/1610.02357)) @@ -14,9 +15,9 @@ Create an Xception block. - `start_with_relu`: if true, start the block with a ReLU activation. - `grow_at_start`: if true, increase the number of channels at the first convolution. """ -function xception_block(inchannels, outchannels, nrepeats; stride = 1, - start_with_relu = true, - grow_at_start = true) +function xception_block(inchannels::Integer, outchannels::Integer, nrepeats::Integer; + stride::Integer = 1, start_with_relu::Bool = true, + grow_at_start::Bool = true) if outchannels != inchannels || stride != 1 skip = conv_norm((1, 1), inchannels, outchannels, identity; stride = stride, bias = false) @@ -44,7 +45,7 @@ function xception_block(inchannels, outchannels, nrepeats; stride = 1, end """ - xception(; inchannels = 3, dropout_rate = 0.0, nclasses = 1000) + xception(; inchannels::Integer = 3, dropout_rate = 0.0, nclasses::Integer = 1000) Creates an Xception model. ([reference](https://arxiv.org/abs/1610.02357)) @@ -55,7 +56,7 @@ Creates an Xception model. - `dropout_rate`: rate of dropout in classifier head. - `nclasses`: the number of output classes. """ -function xception(; inchannels = 3, dropout_rate = 0.0, nclasses = 1000) +function xception(; dropout_rate = 0.0, inchannels::Integer = 3, nclasses::Integer = 1000) body = Chain(conv_norm((3, 3), inchannels, 32; stride = 2, bias = false)..., conv_norm((3, 3), 32, 64; bias = false)..., xception_block(64, 128, 2; stride = 2, start_with_relu = false), @@ -70,13 +71,8 @@ function xception(; inchannels = 3, dropout_rate = 0.0, nclasses = 1000) return Chain(body, head) end -struct Xception - layers::Any -end -@functor Xception - """ - Xception(; pretrain = false, inchannels = 3, dropout_rate = 0.0, nclasses = 1000) + Xception(; pretrain::Bool = false, inchannels::Integer = 3, nclasses::Integer = 1000) Creates an Xception model. ([reference](https://arxiv.org/abs/1610.02357)) @@ -85,15 +81,20 @@ Creates an Xception model. - `pretrain`: set to `true` to load the pre-trained weights for ImageNet. - `inchannels`: number of input channels. - - `dropout_rate`: rate of dropout in classifier head. - `nclasses`: the number of output classes. !!! warning `Xception` does not currently support pretrained weights. """ -function Xception(; pretrain = false, inchannels = 3, dropout_rate = 0.0, nclasses = 1000) - layers = xception(; inchannels, dropout_rate, nclasses) +struct Xception + layers::Any +end +@functor Xception + +function Xception(; pretrain::Bool = false, inchannels::Integer = 3, + nclasses::Integer = 1000) + layers = xception(; inchannels, nclasses) if pretrain loadpretrain!(layers, "xception") end diff --git a/src/convnets/mobilenet/mobilenetv1.jl b/src/convnets/mobilenet/mobilenetv1.jl index fffa93a4d..e31f8835b 100644 --- a/src/convnets/mobilenet/mobilenetv1.jl +++ b/src/convnets/mobilenet/mobilenetv1.jl @@ -1,8 +1,6 @@ """ - mobilenetv1(width_mult, config; - activation = relu, - inchannels = 3, - nclasses = 1000) + mobilenetv1(width_mult::Number, config::Vector{<:Tuple}; activation = relu, + inchannels::Integer = 3, nclasses::Integer = 1000) Create a MobileNetv1 model ([reference](https://arxiv.org/abs/1704.04861v1)). @@ -21,10 +19,8 @@ Create a MobileNetv1 model ([reference](https://arxiv.org/abs/1704.04861v1)). - `inchannels`: The number of input channels. The default value is 3. - `nclasses`: The number of output classes """ -function mobilenetv1(width_mult, config; - activation = relu, - inchannels = 3, - nclasses = 1000) +function mobilenetv1(width_mult::Number, config::Vector{<:Tuple}; activation = relu, + inchannels::Integer = 3, nclasses::Integer = 1000) layers = [] for (dw, outch, stride, nrepeats) in config outch = Int(outch * width_mult) @@ -61,7 +57,8 @@ const MOBILENETV1_CONFIGS = [ ] """ - MobileNetv1(width_mult = 1; inchannels = 3, pretrain = false, nclasses = 1000) + MobileNetv1(width_mult = 1; inchannels::Integer = 3, pretrain::Bool = false, + nclasses::Integer = 1000) Create a MobileNetv1 model with the baseline configuration ([reference](https://arxiv.org/abs/1704.04861v1)). @@ -83,8 +80,8 @@ struct MobileNetv1 end @functor MobileNetv1 -function MobileNetv1(width_mult::Number = 1; inchannels = 3, pretrain = false, - nclasses = 1000) +function MobileNetv1(width_mult::Number = 1; pretrain::Bool = false, + inchannels::Integer = 3, nclasses::Integer = 1000) layers = mobilenetv1(width_mult, MOBILENETV1_CONFIGS; inchannels, nclasses) if pretrain loadpretrain!(layers, string("MobileNetv1")) diff --git a/src/convnets/mobilenet/mobilenetv2.jl b/src/convnets/mobilenet/mobilenetv2.jl index b97fc16ff..9dd35e9f9 100644 --- a/src/convnets/mobilenet/mobilenetv2.jl +++ b/src/convnets/mobilenet/mobilenetv2.jl @@ -1,5 +1,7 @@ """ - mobilenetv2(width_mult, configs; inchannels = 3, max_width = 1280, nclasses = 1000) + mobilenetv2(width_mult::Number, configs::Vector{<:Tuple}; + max_width::Integer = 1280, inchannels::Integer = 3, + nclasses::Integer = 1000) Create a MobileNetv2 model. ([reference](https://arxiv.org/abs/1801.04381)). @@ -20,7 +22,9 @@ Create a MobileNetv2 model. - `max_width`: The maximum number of feature maps in any layer of the network - `nclasses`: The number of output classes """ -function mobilenetv2(width_mult, configs; inchannels = 3, max_width = 1280, nclasses = 1000) +function mobilenetv2(width_mult::Number, configs::Vector{<:Tuple}; + max_width::Integer = 1280, inchannels::Integer = 3, + nclasses::Integer = 1000) # building first layer inplanes = _round_channels(32 * width_mult, width_mult == 0.1 ? 4 : 8) layers = [] @@ -30,7 +34,7 @@ function mobilenetv2(width_mult, configs; inchannels = 3, max_width = 1280, ncla outplanes = _round_channels(c * width_mult, width_mult == 0.1 ? 4 : 8) for i in 1:n push!(layers, - invertedresidual((3, 3), inplanes, inplanes * t, outplanes, a; + invertedresidual((3, 3), inplanes, outplanes, a; expansion = t, stride = i == 1 ? s : 1)) inplanes = outplanes end @@ -57,13 +61,9 @@ const MOBILENETV2_CONFIGS = [ (6, 320, 1, 1, relu6), ] -struct MobileNetv2 - layers::Any -end -@functor MobileNetv2 - """ - MobileNetv2(width_mult = 1.0; inchannels = 3, pretrain = false, nclasses = 1000) + MobileNetv2(width_mult = 1.0; inchannels::Integer = 3, pretrain::Bool = false, + nclasses::Integer = 1000) Create a MobileNetv2 model with the specified configuration. ([reference](https://arxiv.org/abs/1801.04381)). @@ -74,14 +74,19 @@ Set `pretrain` to `true` to load the pretrained weights for ImageNet. - `width_mult`: Controls the number of output feature maps in each block (with 1.0 being the default in the paper; this is usually a value between 0.1 and 1.4) - - `inchannels`: The number of input channels. - `pretrain`: Whether to load the pre-trained weights for ImageNet + - `inchannels`: The number of input channels. - `nclasses`: The number of output classes See also [`Metalhead.mobilenetv2`](#). """ -function MobileNetv2(width_mult::Number = 1; inchannels = 3, pretrain = false, - nclasses = 1000) +struct MobileNetv2 + layers::Any +end +@functor MobileNetv2 + +function MobileNetv2(width_mult::Number = 1; pretrain::Bool = false, + inchannels::Integer = 3, nclasses::Integer = 1000) layers = mobilenetv2(width_mult, MOBILENETV2_CONFIGS; inchannels, nclasses) pretrain && loadpretrain!(layers, string("MobileNetv2")) if pretrain diff --git a/src/convnets/mobilenet/mobilenetv3.jl b/src/convnets/mobilenet/mobilenetv3.jl index d6873ac57..00c0e0139 100644 --- a/src/convnets/mobilenet/mobilenetv3.jl +++ b/src/convnets/mobilenet/mobilenetv3.jl @@ -1,5 +1,7 @@ """ - mobilenetv3(width_mult, configs; inchannels = 3, max_width = 1024, nclasses = 1000) + mobilenetv3(width_mult::Number, configs::Vector{<:Tuple}; + max_width::Integer = 1024, inchannels::Integer = 3, + nclasses::Integer = 1000) Create a MobileNetv3 model. ([reference](https://arxiv.org/abs/1905.02244)). @@ -22,7 +24,9 @@ Create a MobileNetv3 model. - `max_width`: The maximum number of feature maps in any layer of the network - `nclasses`: the number of output classes """ -function mobilenetv3(width_mult, configs; inchannels = 3, max_width = 1024, nclasses = 1000) +function mobilenetv3(width_mult::Number, configs::Vector{<:Tuple}; + max_width::Integer = 1024, inchannels::Integer = 3, + nclasses::Integer = 1000) # building first layer inplanes = _round_channels(16 * width_mult, 8) layers = [] @@ -86,13 +90,9 @@ const MOBILENETV3_CONFIGS = Dict(:small => [ (5, 6, 160, 4, hardswish, 1), ]) -struct MobileNetv3 - layers::Any -end -@functor MobileNetv3 - """ - MobileNetv3(mode::Symbol = :small, width_mult::Number = 1; inchannels = 3, pretrain = false, nclasses = 1000) + MobileNetv3(mode::Symbol = :small, width_mult::Number = 1; pretrain::Bool = false, + inchannels::Integer = 3, nclasses::Integer = 1000) Create a MobileNetv3 model with the specified configuration. ([reference](https://arxiv.org/abs/1905.02244)). @@ -104,15 +104,20 @@ Set `pretrain = true` to load the model with pre-trained weights for ImageNet. - `width_mult`: Controls the number of output feature maps in each block (with 1.0 being the default in the paper; this is usually a value between 0.1 and 1.4) - - `inchannels`: The number of channels in the input. - `pretrain`: whether to load the pre-trained weights for ImageNet + - `inchannels`: The number of channels in the input. - `nclasses`: the number of output classes See also [`Metalhead.mobilenetv3`](#). """ -function MobileNetv3(mode::Symbol = :small, width_mult::Number = 1; inchannels = 3, - pretrain = false, nclasses = 1000) - @assert mode in [:large, :small] "`mode` has to be either :large or :small" +struct MobileNetv3 + layers::Any +end +@functor MobileNetv3 + +function MobileNetv3(mode::Symbol = :small, width_mult::Number = 1; pretrain::Bool = false, + inchannels::Integer = 3, nclasses::Integer = 1000) + _checkconfig(mode, [:small, :large]) max_width = (mode == :large) ? 1280 : 1024 layers = mobilenetv3(width_mult, MOBILENETV3_CONFIGS[mode]; inchannels, max_width, nclasses) diff --git a/src/convnets/resnets/core.jl b/src/convnets/resnets/core.jl index 329663c13..940565f3a 100644 --- a/src/convnets/resnets/core.jl +++ b/src/convnets/resnets/core.jl @@ -132,7 +132,7 @@ end # end """ - resnet_stem(; stem_type = :default, inchannels = 3, replace_stem_pool = false, + resnet_stem(; stem_type = :default, inchannels::Integer = 3, replace_stem_pool = false, norm_layer = BatchNorm, activation = relu) Builds a stem to be used in a ResNet model. See the `stem` argument of [`resnet`](#) for details diff --git a/src/convnets/resnets/resnet.jl b/src/convnets/resnets/resnet.jl index fac7e7415..9bf9cd82c 100644 --- a/src/convnets/resnets/resnet.jl +++ b/src/convnets/resnets/resnet.jl @@ -1,5 +1,5 @@ """ - ResNet(depth::Integer; pretrain = false, inchannels = 3, nclasses = 1000) + ResNet(depth::Integer; pretrain::Bool = false, inchannels::Integer = 3, nclasses::Integer = 1000) Creates a ResNet model with the specified depth. ((reference)[https://arxiv.org/abs/1512.03385]) @@ -22,7 +22,8 @@ struct ResNet end @functor ResNet -function ResNet(depth::Integer; pretrain = false, inchannels = 3, nclasses = 1000) +function ResNet(depth::Integer; pretrain::Bool = false, inchannels::Integer = 3, + nclasses::Integer = 1000) _checkconfig(depth, keys(RESNET_CONFIGS)) layers = resnet(RESNET_CONFIGS[depth]...; inchannels, nclasses) if pretrain @@ -37,7 +38,7 @@ backbone(m::ResNet) = m.layers[1] classifier(m::ResNet) = m.layers[2] """ - WideResNet(depth::Integer; pretrain = false, inchannels = 3, nclasses = 1000) + WideResNet(depth::Integer; pretrain::Bool = false, inchannels::Integer = 3, nclasses::Integer = 1000) Creates a Wide ResNet model with the specified depth. The model is the same as ResNet except for the bottleneck number of channels which is twice larger in every block. @@ -62,7 +63,8 @@ struct WideResNet end @functor WideResNet -function WideResNet(depth::Integer; pretrain = false, inchannels = 3, nclasses = 1000) +function WideResNet(depth::Integer; pretrain::Bool = false, inchannels::Integer = 3, + nclasses::Integer = 1000) _checkconfig(depth, [50, 101]) layers = resnet(RESNET_CONFIGS[depth]...; base_width = 128, inchannels, nclasses) if pretrain diff --git a/src/convnets/resnets/resnext.jl b/src/convnets/resnets/resnext.jl index 8032df5ab..29d89e3f1 100644 --- a/src/convnets/resnets/resnext.jl +++ b/src/convnets/resnets/resnext.jl @@ -1,6 +1,6 @@ """ - ResNeXt(depth::Integer; pretrain = false, cardinality = 32, - base_width = 4, inchannels = 3, nclasses = 1000) + ResNeXt(depth::Integer; pretrain::Bool = false, cardinality = 32, + base_width = 4, inchannels::Integer = 3, nclasses::Integer = 1000) Creates a ResNeXt model with the specified depth, cardinality, and base width. ((reference)[https://arxiv.org/abs/1611.05431]) @@ -27,8 +27,8 @@ end (m::ResNeXt)(x) = m.layers(x) -function ResNeXt(depth::Integer; pretrain = false, cardinality = 32, - base_width = 4, inchannels = 3, nclasses = 1000) +function ResNeXt(depth::Integer; pretrain::Bool = false, cardinality = 32, + base_width = 4, inchannels::Integer = 3, nclasses::Integer = 1000) _checkconfig(depth, sort(collect(keys(RESNET_CONFIGS)))[3:end]) layers = resnet(RESNET_CONFIGS[depth]...; inchannels, nclasses, cardinality, base_width) if pretrain diff --git a/src/convnets/resnets/seresnet.jl b/src/convnets/resnets/seresnet.jl index 05d842173..61eee3aad 100644 --- a/src/convnets/resnets/seresnet.jl +++ b/src/convnets/resnets/seresnet.jl @@ -1,5 +1,5 @@ """ - SEResNet(depth::Integer; pretrain = false, inchannels = 3, nclasses = 1000) + SEResNet(depth::Integer; pretrain::Bool = false, inchannels::Integer = 3, nclasses::Integer = 1000) Creates a SEResNet model with the specified depth. ((reference)[https://arxiv.org/pdf/1709.01507.pdf]) @@ -24,7 +24,8 @@ end (m::SEResNet)(x) = m.layers(x) -function SEResNet(depth::Integer; pretrain = false, inchannels = 3, nclasses = 1000) +function SEResNet(depth::Integer; pretrain::Bool = false, inchannels::Integer = 3, + nclasses::Integer = 1000) _checkconfig(depth, keys(RESNET_CONFIGS)) layers = resnet(RESNET_CONFIGS[depth]...; inchannels, nclasses, attn_fn = squeeze_excite) @@ -38,8 +39,8 @@ backbone(m::SEResNet) = m.layers[1] classifier(m::SEResNet) = m.layers[2] """ - SEResNeXt(depth::Integer; pretrain = false, cardinality = 32, base_width = 4, - inchannels = 3, nclasses = 1000) + SEResNeXt(depth::Integer; pretrain::Bool = false, cardinality = 32, base_width = 4, + inchannels::Integer = 3, nclasses::Integer = 1000) Creates a SEResNeXt model with the specified depth, cardinality, and base width. ((reference)[https://arxiv.org/pdf/1709.01507.pdf]) @@ -66,8 +67,8 @@ end (m::SEResNeXt)(x) = m.layers(x) -function SEResNeXt(depth::Integer; pretrain = false, cardinality = 32, base_width = 4, - inchannels = 3, nclasses = 1000) +function SEResNeXt(depth::Integer; pretrain::Bool = false, cardinality = 32, base_width = 4, + inchannels::Integer = 3, nclasses::Integer = 1000) _checkconfig(depth, sort(collect(keys(RESNET_CONFIGS)))[3:end]) layers = resnet(RESNET_CONFIGS[depth]...; inchannels, nclasses, cardinality, base_width, attn_fn = squeeze_excite) diff --git a/src/convnets/squeezenet.jl b/src/convnets/squeezenet.jl index abcdd63f8..3ee6653bc 100644 --- a/src/convnets/squeezenet.jl +++ b/src/convnets/squeezenet.jl @@ -1,5 +1,6 @@ """ - fire(inplanes, squeeze_planes, expand1x1_planes, expand3x3_planes) + fire(inplanes::Integer, squeeze_planes::Integer, expand1x1_planes::Integer, + expand3x3_planes::Integer) Create a fire module ([reference](https://arxiv.org/abs/1602.07360v4)). @@ -11,7 +12,8 @@ Create a fire module - `expand1x1_planes`: number of output feature maps for the 1x1 expansion convolution - `expand3x3_planes`: number of output feature maps for the 3x3 expansion convolution """ -function fire(inplanes, squeeze_planes, expand1x1_planes, expand3x3_planes) +function fire(inplanes::Integer, squeeze_planes::Integer, expand1x1_planes::Integer, + expand3x3_planes::Integer) branch_1 = Conv((1, 1), inplanes => squeeze_planes, relu) branch_2 = Conv((1, 1), squeeze_planes => expand1x1_planes, relu) branch_3 = Conv((3, 3), squeeze_planes => expand3x3_planes, relu; pad = 1) @@ -19,13 +21,18 @@ function fire(inplanes, squeeze_planes, expand1x1_planes, expand3x3_planes) end """ - squeezenet() + squeezenet(; inchannels::Integer = 3, nclasses::Integer = 1000) Create a SqueezeNet ([reference](https://arxiv.org/abs/1602.07360v4)). + +# Arguments + + - `inchannels`: number of input channels. + - `nclasses`: the number of output classes. """ -function squeezenet() - return Chain(Chain(Conv((3, 3), 3 => 64, relu; stride = 2), +function squeezenet(; inchannels::Integer = 3, nclasses::Integer = 1000) + return Chain(Chain(Conv((3, 3), inchannels => 64, relu; stride = 2), MaxPool((3, 3); stride = 2), fire(64, 16, 64, 64), fire(128, 16, 64, 64), @@ -38,17 +45,23 @@ function squeezenet() fire(384, 64, 256, 256), fire(512, 64, 256, 256), Dropout(0.5), - Conv((1, 1), 512 => 1000, relu)), + Conv((1, 1), 512 => nclasses, relu)), AdaptiveMeanPool((1, 1)), MLUtils.flatten) end """ - SqueezeNet(; pretrain = false) + SqueezeNet(; pretrain::Bool = false, inchannels::Integer = 3, + nclasses::Integer = 1000) Create a SqueezeNet ([reference](https://arxiv.org/abs/1602.07360v4)). -Set `pretrain=true` to load the model with pre-trained weights for ImageNet. + +# Arguments + + - `pretrain`: set to `true` to load the pre-trained weights for ImageNet + - `inchannels`: number of input channels. + - `nclasses`: the number of output classes. !!! warning @@ -61,8 +74,9 @@ struct SqueezeNet end @functor SqueezeNet -function SqueezeNet(; pretrain = false) - layers = squeezenet() +function SqueezeNet(; pretrain::Bool = false, inchannels::Integer = 3, + nclasses::Integer = 1000) + layers = squeezenet(; inchannels, nclasses) if pretrain loadpretrain!(layers, "SqueezeNet") end diff --git a/src/convnets/vgg.jl b/src/convnets/vgg.jl index ccfdd2cff..0b6026eb8 100644 --- a/src/convnets/vgg.jl +++ b/src/convnets/vgg.jl @@ -11,7 +11,7 @@ A VGG block of convolution layers - `depth`: number of convolution/convolution + batch norm layers - `batchnorm`: set to `true` to include batch normalization after each convolution """ -function vgg_block(ifilters, ofilters, depth, batchnorm) +function vgg_block(ifilters::Integer, ofilters::Integer, depth::Integer, batchnorm::Bool) k = (3, 3) p = (1, 1) layers = [] @@ -40,7 +40,8 @@ Create VGG convolution layers - `batchnorm`: set to `true` to include batch normalization after each convolution - `inchannels`: number of input channels """ -function vgg_convolutional_layers(config, batchnorm, inchannels) +function vgg_convolutional_layers(config::Vector{<:Tuple}, batchnorm::Bool, + inchannels::Integer) layers = [] ifilters = inchannels for c in config @@ -65,7 +66,8 @@ Create VGG classifier (fully connected) layers - `fcsize`: input and output size of the intermediate fully connected layer - `dropout_rate`: the dropout level between each fully connected layer """ -function vgg_classifier_layers(imsize, nclasses, fcsize, dropout_rate) +function vgg_classifier_layers(imsize::NTuple{3, <:Integer}, nclasses::Integer, + fcsize::Integer, dropout_rate) return Chain(MLUtils.flatten, Dense(Int(prod(imsize)), fcsize, relu), Dropout(dropout_rate), @@ -92,7 +94,8 @@ Create a VGG model (see [`Metalhead.vgg_classifier_layers`](#)) - `dropout_rate`: dropout level between fully connected layers """ -function vgg(imsize; config, inchannels, batchnorm = false, nclasses, fcsize, dropout_rate) +function vgg(imsize::Dims{2}; config, batchnorm::Bool = false, fcsize::Integer = 4096, + dropout_rate = 0.0, inchannels::Integer = 3, nclasses::Integer = 1000) conv = vgg_convolutional_layers(config, batchnorm, inchannels) imsize = outputsize(conv, (imsize..., inchannels); padbatch = true)[1:3] class = vgg_classifier_layers(imsize, nclasses, fcsize, dropout_rate) @@ -109,10 +112,6 @@ const VGG_CONFIGS = Dict(11 => :A, 16 => :D, 19 => :E) -struct VGG - layers::Any -end - """ VGG(imsize::Dims{2}; config, inchannels, batchnorm = false, nclasses, fcsize, dropout_rate) @@ -120,46 +119,53 @@ Construct a VGG model with the specified input image size. Typically, the image ## Keyword Arguments: - - `config` : VGG convolutional block configuration. It is defined as a vector of tuples `(output_channels, num_convolutions)` for each block - - `inchannels`::Integer : number of input channels - - `batchnorm`::Bool : set to `true` to use batch normalization after each convolution - - `nclasses`::Integer : number of output classes + - `config` : VGG convolutional block configuration. It is defined as a vector of tuples + `(output_channels, num_convolutions)` for each block + - `inchannels`: number of input channels + - `batchnorm`: set to `true` to use batch normalization after each convolution + - `nclasses`: number of output classes - `fcsize`: intermediate fully connected layer size (see [`Metalhead.vgg_classifier_layers`](#)) - `dropout_rate`: dropout level between fully connected layers """ -function VGG(imsize::Dims{2}; config, inchannels, batchnorm = false, nclasses, fcsize, - dropout_rate) - layers = vgg(imsize; config, inchannels, batchnorm, nclasses, fcsize, dropout_rate) - return VGG(layers) +struct VGG + layers::Any end - @functor VGG +function VGG(imsize::Dims{2}; config, batchnorm::Bool = false, dropout_rate = 0.5, + inchannels::Integer = 3, nclasses::Integer = 1000) + layers = vgg(imsize; config, inchannels, batchnorm, nclasses, dropout_rate) + return VGG(layers) +end + (m::VGG)(x) = m.layers(x) backbone(m::VGG) = m.layers[1] classifier(m::VGG) = m.layers[2] """ - VGG(depth::Integer = 16; pretrain = false, batchnorm = false) + VGG(depth::Integer; pretrain::Bool = false, batchnorm::Bool = false, + inchannels::Integer = 3, nclasses::Integer = 1000) -Create a VGG style model with specified `depth`. Available values include (11, 13, 16, 19). +Create a VGG style model with specified `depth`. ([reference](https://arxiv.org/abs/1409.1556v6)). -See also [`VGG`](#). # Arguments + - `depth`: the depth of the VGG model. Must be one of [11, 13, 16, 19]. - `pretrain`: set to `true` to load pre-trained model weights for ImageNet + - `batchnorm`: set to `true` to use batch normalization after each convolution + - `inchannels`: number of input channels + - `nclasses`: number of output classes + +See also [`vgg`](#). """ -function VGG(depth::Integer = 16; pretrain = false, batchnorm = false, nclasses = 1000) +function VGG(depth::Integer; pretrain::Bool = false, batchnorm::Bool = false, + inchannels::Integer = 3, nclasses::Integer = 1000) _checkconfig(depth, keys(VGG_CONFIGS)) - model = VGG((224, 224); config = VGG_CONV_CONFIGS[VGG_CONFIGS[depth]], - inchannels = 3, - batchnorm = batchnorm, - nclasses = nclasses, - fcsize = 4096, - dropout_rate = 0.5) + model = VGG((224, 224); config = VGG_CONV_CONFIGS[VGG_CONFIGS[depth]], batchnorm, + inchannels, nclasses) if pretrain && !batchnorm loadpretrain!(model, string("vgg", depth)) elseif pretrain diff --git a/src/layers/conv.jl b/src/layers/conv.jl index 557db23a7..75b40708c 100644 --- a/src/layers/conv.jl +++ b/src/layers/conv.jl @@ -23,9 +23,9 @@ Create a convolution + batch normalization pair with activation. - `groups`: groups for the convolution kernel - `bias`, `weight`, `init`: initialization for the convolution kernel (see [`Flux.Conv`](#)) """ -function conv_norm(kernel_size, inplanes::Int, outplanes::Int, activation = relu; - norm_layer = BatchNorm, revnorm = false, preact = false, use_norm = true, - kwargs...) +function conv_norm(kernel_size, inplanes::Integer, outplanes::Integer, activation = relu; + norm_layer = BatchNorm, revnorm::Bool = false, preact::Bool = false, + use_norm::Bool = true, kwargs...) if !use_norm if (preact || revnorm) throw(ArgumentError("`preact` only supported with `use_norm = true`")) @@ -60,8 +60,8 @@ end """ depthwise_sep_conv_norm(kernel_size, inplanes, outplanes, activation = relu; - revnorm = false, use_norm = (true, true), - stride = 1, pad = 0, dilation = 1, [bias, weight, init]) + revnorm = false, use_norm = (true, true), + stride = 1, pad = 0, dilation = 1, [bias, weight, init]) Create a depthwise separable convolution chain as used in MobileNetv1. This is sequence of layers: @@ -86,9 +86,11 @@ See Fig. 3 in [reference](https://arxiv.org/abs/1704.04861v1). - `dilation`: dilation of the first convolution kernel - `bias`, `weight`, `init`: initialization for the convolution kernel (see [`Flux.Conv`](#)) """ -function depthwise_sep_conv_norm(kernel_size, inplanes, outplanes, activation = relu; - norm_layer = BatchNorm, revnorm = false, - use_norm = (true, true), stride = 1, kwargs...) +function depthwise_sep_conv_norm(kernel_size, inplanes::Integer, outplanes::Integer, + activation = relu; norm_layer = BatchNorm, + revnorm::Bool = false, + use_norm::NTuple{2, Bool} = (true, true), + stride::Integer = 1, kwargs...) return vcat(conv_norm(kernel_size, inplanes, inplanes, activation; norm_layer, revnorm, use_norm = use_norm[1], stride, groups = inplanes, kwargs...), diff --git a/src/layers/drop.jl b/src/layers/drop.jl index b4a882cff..f823d5c22 100644 --- a/src/layers/drop.jl +++ b/src/layers/drop.jl @@ -1,5 +1,6 @@ # Generates the mask to be used for `DropBlock` -@inline function _dropblock_mask(rng, x, gamma, clipped_block_size) +@inline function _dropblock_mask(rng, x::AbstractArray{T, 4}, gamma, + clipped_block_size::Integer) where {T} block_mask = rand_like(rng, x) block_mask .= block_mask .< gamma return 1 .- maxpool(block_mask, (clipped_block_size, clipped_block_size); @@ -28,8 +29,8 @@ If you are an end-user, you do not want this function. Use [`DropBlock`](#) inst """ # TODO add experimental `DropBlock` options from timm such as gaussian noise and # more precise `DropBlock` to deal with edges (#188) -function dropblock(rng::AbstractRNG, x::AbstractArray{T, 4}, drop_block_prob, block_size, - gamma_scale) where {T} +function dropblock(rng::AbstractRNG, x::AbstractArray{T, 4}, drop_block_prob, + block_size::Integer, gamma_scale) where {T} H, W, _, _ = size(x) total_size = H * W clipped_block_size = min(block_size, min(H, W)) @@ -100,7 +101,7 @@ size `block_size` in the input. During inference, it simply returns the input `x - `rng`: can be used to pass in a custom RNG instead of the default. Custom RNGs are only supported on the CPU. """ -function DropBlock(drop_block_prob = 0.1, block_size = 7, gamma_scale = 1.0, +function DropBlock(drop_block_prob = 0.1, block_size::Integer = 7, gamma_scale = 1.0, rng = rng_from_array()) if drop_block_prob == 0.0 return identity diff --git a/src/layers/embeddings.jl b/src/layers/embeddings.jl index 3e85f18d9..560ac074d 100644 --- a/src/layers/embeddings.jl +++ b/src/layers/embeddings.jl @@ -1,7 +1,7 @@ _flatten_spatial(x) = permutedims(reshape(x, (:, size(x, 3), size(x, 4))), (2, 1, 3)) """ - PatchEmbedding(imsize::Dims{2} = (224, 224); inchannels = 3, + PatchEmbedding(imsize::Dims{2} = (224, 224); inchannels::Integer = 3, patch_size::Dims{2} = (16, 16), embedplanes = 768, norm_layer = planes -> identity, flatten = true) @@ -19,8 +19,8 @@ patches. - `flatten`: set true to flatten the input spatial dimensions after the embedding """ function PatchEmbedding(imsize::Dims{2} = (224, 224); inchannels::Integer = 3, - patch_size::Dims{2} = (16, 16), embedplanes = 768, - norm_layer = planes -> identity, flatten = true) + patch_size::Dims{2} = (16, 16), embedplanes::Integer = 768, + norm_layer = planes -> identity, flatten::Bool = true) im_height, im_width = imsize patch_height, patch_width = patch_size @@ -33,13 +33,15 @@ function PatchEmbedding(imsize::Dims{2} = (224, 224); inchannels::Integer = 3, end """ - ViPosEmbedding(embedsize::Integer, npatches::Integer; init = (dims::Dims{2}) -> rand(Float32, dims)) + ViPosEmbedding(embedsize::Integer, npatches::Integer; + init = (dims::Dims{2}) -> rand(Float32, dims)) Positional embedding layer used by many vision transformer-like models. """ struct ViPosEmbedding{T} vectors::T end +@functor ViPosEmbedding function ViPosEmbedding(embedsize::Integer, npatches::Integer; init = (dims::Dims{2}) -> rand(Float32, dims)) @@ -48,22 +50,20 @@ end (p::ViPosEmbedding)(x) = x .+ p.vectors -@functor ViPosEmbedding - """ - ClassTokens(dim; init = Flux.zeros32) + ClassTokens(planes::Integer; init = Flux.zeros32) -Appends class tokens to an input with embedding dimension `dim` for use in many vision transformer models. +Appends class tokens to an input with embedding dimension `planes` for use in many +vision transformer models. """ struct ClassTokens{T} token::T end +@functor ClassTokens -ClassTokens(dim::Integer; init = Flux.zeros32) = ClassTokens(init(dim, 1, 1)) +ClassTokens(planes::Integer; init = Flux.zeros32) = ClassTokens(init(planes, 1, 1)) function (m::ClassTokens)(x::AbstractArray{T, 3}) where {T} tokens = m.token .* MLUtils.ones_like(x, T, (1, 1, size(x, 3))) return hcat(tokens, x) end - -@functor ClassTokens diff --git a/src/layers/mlp.jl b/src/layers/mlp.jl index a3bdb0fb5..3a1c27413 100644 --- a/src/layers/mlp.jl +++ b/src/layers/mlp.jl @@ -47,8 +47,9 @@ end gated_mlp_block(::typeof(identity), args...; kwargs...) = mlp_block(args...; kwargs...) """ - create_classifier(inplanes, nclasses; pool_layer = AdaptiveMeanPool((1, 1)), - dropout_rate = 0.0, use_conv = false) + create_classifier(inplanes::Integer, nclasses::Integer; + pool_layer = AdaptiveMeanPool((1, 1)), + dropout_rate = 0.0, use_conv::Bool = false) Creates a classifier head to be used for models. @@ -61,8 +62,9 @@ Creates a classifier head to be used for models. - `dropout_rate`: dropout rate used in the classifier head. - `use_conv`: whether to use a 1x1 convolutional layer instead of a `Dense` layer. """ -function create_classifier(inplanes, nclasses; pool_layer = AdaptiveMeanPool((1, 1)), - dropout_rate = 0.0, use_conv = false) +function create_classifier(inplanes::Integer, nclasses::Integer; + pool_layer = AdaptiveMeanPool((1, 1)), + dropout_rate = 0.0, use_conv::Bool = false) # Pooling if pool_layer === identity @assert use_conv diff --git a/src/layers/pool.jl b/src/layers/pool.jl index 1962ab0fb..049c06451 100644 --- a/src/layers/pool.jl +++ b/src/layers/pool.jl @@ -1,5 +1,6 @@ """ - AdaptiveMeanMaxPool(output_size = (1, 1); connection = +) + AdaptiveMeanMaxPool(connection = +, output_size::Tuple = (1, 1)) + AdaptiveMeanMaxPool(output_size::Tuple = (1, 1)) A type of adaptive pooling layer which uses both mean and max pooling and combines them to produce a single output. Note that this is equivalent to @@ -10,7 +11,7 @@ produce a single output. Note that this is equivalent to - `output_size`: The size of the output after pooling. - `connection`: The connection type to use. """ -function AdaptiveMeanMaxPool(connection, output_size = (1, 1)) +function AdaptiveMeanMaxPool(connection, output_size::Tuple = (1, 1)) return Parallel(connection, AdaptiveMeanPool(output_size), AdaptiveMaxPool(output_size)) end AdaptiveMeanMaxPool(output_size::Tuple = (1, 1)) = AdaptiveMeanMaxPool(+, output_size) diff --git a/src/layers/scale.jl b/src/layers/scale.jl index 965b50f38..f3a555b76 100644 --- a/src/layers/scale.jl +++ b/src/layers/scale.jl @@ -9,7 +9,7 @@ _input_scale(λ, activation, x) = activation.(λ .* x) _input_scale(λ, ::typeof(identity), x) = λ .* x """ - LayerScale(λ, planes::Integer) + LayerScale(planes::Integer, λ) Creates a `Flux.Scale` layer that performs "`LayerScale`" ([reference](https://arxiv.org/abs/2103.17239)). diff --git a/src/layers/selayers.jl b/src/layers/selayers.jl index db0f3715d..0756225ba 100644 --- a/src/layers/selayers.jl +++ b/src/layers/selayers.jl @@ -15,9 +15,9 @@ Creates a squeeze-and-excitation layer used in MobileNets and SE-Nets. - `norm_layer`: The normalization layer to be used after the convolution layers - `rd_planes`: The number of hidden feature maps in a squeeze and excite layer """ -function squeeze_excite(inplanes; reduction = 16, rd_divisor = 8, - activation = relu, gate_activation = sigmoid, - norm_layer = planes -> identity, +function squeeze_excite(inplanes::Integer; reduction::Integer = 16, + rd_divisor::Integer = 8, activation = relu, + gate_activation = sigmoid, norm_layer = planes -> identity, rd_planes = _round_channels(inplanes ÷ reduction, rd_divisor, 0)) layers = [AdaptiveMeanPool((1, 1)), Conv((1, 1), inplanes => rd_planes), @@ -40,7 +40,7 @@ Effective squeeze-and-excitation layer. - `inplanes`: The number of input feature maps - `gate_activation`: The activation function for the gate layer """ -function effective_squeeze_excite(inplanes; gate_activation = sigmoid, kwargs...) +function effective_squeeze_excite(inplanes::Integer; gate_activation = sigmoid) return SkipConnection(Chain(AdaptiveMeanPool((1, 1)), Conv((1, 1), inplanes, inplanes), gate_activation), .*) diff --git a/src/mixers/core.jl b/src/mixers/core.jl index 9f9d3b305..18f66aaa8 100644 --- a/src/mixers/core.jl +++ b/src/mixers/core.jl @@ -1,7 +1,7 @@ """ - mlpmixer(block, imsize::Dims{2} = (224, 224); inchannels = 3, norm_layer = LayerNorm, + mlpmixer(block, imsize::Dims{2} = (224, 224); inchannels::Integer = 3, norm_layer = LayerNorm, patch_size::Dims{2} = (16, 16), embedplanes = 512, drop_path_rate = 0., - depth = 12, nclasses = 1000, kwargs...) + depth = 12, nclasses::Integer = 1000, kwargs...) Creates a model with the MLPMixer architecture. ([reference](https://arxiv.org/pdf/2105.01601)). @@ -21,10 +21,9 @@ Creates a model with the MLPMixer architecture. - `kwargs`: additional arguments (if any) to pass to the mixer block. Will use the defaults if not specified. """ -function mlpmixer(block, imsize::Dims{2} = (224, 224); inchannels = 3, - norm_layer = LayerNorm, patch_size::Dims{2} = (16, 16), - embedplanes = 512, drop_path_rate = 0.0, - depth = 12, nclasses = 1000, kwargs...) +function mlpmixer(block, imsize::Dims{2} = (224, 224); norm_layer = LayerNorm, + patch_size::Dims{2} = (16, 16), embedplanes = 512, drop_path_rate = 0.0, + depth = 12, inchannels::Integer = 3, nclasses::Integer = 1000, kwargs...) npatches = prod(imsize .÷ patch_size) dp_rates = linear_scheduler(drop_path_rate; depth) layers = Chain(PatchEmbedding(imsize; inchannels, patch_size, embedplanes), diff --git a/src/mixers/gmlp.jl b/src/mixers/gmlp.jl index 9ebd2dce3..df4a52b70 100644 --- a/src/mixers/gmlp.jl +++ b/src/mixers/gmlp.jl @@ -42,9 +42,9 @@ function (m::SpatialGatingUnit)(x) end """ - spatial_gating_block(planes, npatches; mlp_ratio = 4.0, mlp_layer = gated_mlp_block, - norm_layer = LayerNorm, dropout_rate = 0.0, drop_path_rate = 0.0, - activation = gelu) + spatial_gating_block(planes::Integer, npatches::Integer; mlp_ratio = 4.0, + norm_layer = LayerNorm, mlp_layer = gated_mlp_block, + dropout_rate = 0.0, drop_path_rate = 0.0, activation = gelu) Creates a feedforward block based on the gMLP model architecture described in the paper. ([reference](https://arxiv.org/abs/2105.08050)) @@ -60,10 +60,9 @@ Creates a feedforward block based on the gMLP model architecture described in th - `drop_path_rate`: Stochastic depth rate - `activation`: the activation function to use in the MLP blocks """ -function spatial_gating_block(planes, npatches; mlp_ratio = 4.0, norm_layer = LayerNorm, - mlp_layer = gated_mlp_block, dropout_rate = 0.0, - drop_path_rate = 0.0, - activation = gelu) +function spatial_gating_block(planes::Integer, npatches::Integer; mlp_ratio = 4.0, + norm_layer = LayerNorm, mlp_layer = gated_mlp_block, + dropout_rate = 0.0, drop_path_rate = 0.0, activation = gelu) channelplanes = Int(mlp_ratio * planes) sgu = inplanes -> SpatialGatingUnit(inplanes, npatches; norm_layer) return SkipConnection(Chain(norm_layer(planes), @@ -72,14 +71,9 @@ function spatial_gating_block(planes, npatches; mlp_ratio = 4.0, norm_layer = La DropPath(drop_path_rate)), +) end -struct gMLP - layers::Any -end -@functor gMLP - """ - gMLP(size::Symbol = :base; patch_size::Dims{2} = (16, 16), - imsize::Dims{2} = (224, 224), drop_path_rate = 0., nclasses = 1000) + gMLP(size::Symbol; patch_size::Dims{2} = (16, 16), imsize::Dims{2} = (224, 224), + inchannels::Integer = 3, nclasses::Integer = 1000) Creates a model with the gMLP architecture. ([reference](https://arxiv.org/abs/2105.08050)). @@ -89,18 +83,23 @@ Creates a model with the gMLP architecture. - `size`: the size of the model - one of `small`, `base`, `large` or `huge` - `patch_size`: the size of the patches - `imsize`: the size of the input image - - `drop_path_rate`: Stochastic depth rate + - `inchannels`: the number of input channels - `nclasses`: number of output classes See also [`Metalhead.mlpmixer`](#). """ -function gMLP(size::Symbol = :base; patch_size::Dims{2} = (16, 16), - imsize::Dims{2} = (224, 224), drop_path_rate = 0.0, nclasses = 1000) +struct gMLP + layers::Any +end +@functor gMLP + +function gMLP(size::Symbol; imsize::Dims{2} = (224, 224), patch_size::Dims{2} = (16, 16), + inchannels::Integer = 3, nclasses::Integer = 1000) _checkconfig(size, keys(MIXER_CONFIGS)) depth = MIXER_CONFIGS[size][:depth] embedplanes = MIXER_CONFIGS[size][:planes] - layers = mlpmixer(spatial_gating_block, imsize; mlp_layer = gated_mlp_block, - patch_size, embedplanes, drop_path_rate, depth, nclasses) + layers = mlpmixer(spatial_gating_block, imsize; mlp_layer = gated_mlp_block, patch_size, + embedplanes, depth, inchannels, nclasses) return gMLP(layers) end diff --git a/src/mixers/mlpmixer.jl b/src/mixers/mlpmixer.jl index 7b6d4aa09..06aefbd48 100644 --- a/src/mixers/mlpmixer.jl +++ b/src/mixers/mlpmixer.jl @@ -1,6 +1,7 @@ """ - mixerblock(planes, npatches; mlp_ratio = (0.5, 4.0), mlp_layer = mlp_block, - dropout_rate = 0., drop_path_rate = 0., activation = gelu) + mixerblock(planes::Integer, npatches::Integer; mlp_layer = mlp_block, + mlp_ratio = (0.5, 4.0), dropout_rate = 0.0, drop_path_rate = 0.0, + activation = gelu) Creates a feedforward block for the MLPMixer architecture. ([reference](https://arxiv.org/pdf/2105.01601)) @@ -16,9 +17,10 @@ Creates a feedforward block for the MLPMixer architecture. - `drop_path_rate`: Stochastic depth rate - `activation`: the activation function to use in the MLP blocks """ -function mixerblock(planes, npatches; mlp_ratio = (0.5, 4.0), mlp_layer = mlp_block, - dropout_rate = 0.0, drop_path_rate = 0.0, activation = gelu) - tokenplanes, channelplanes = [Int(r * planes) for r in mlp_ratio] +function mixerblock(planes::Integer, npatches::Integer; mlp_layer = mlp_block, + mlp_ratio::NTuple{2, Number} = (0.5, 4.0), dropout_rate = 0.0, + drop_path_rate = 0.0, activation = gelu) + tokenplanes, channelplanes = Int.(planes .* mlp_ratio) return Chain(SkipConnection(Chain(LayerNorm(planes), swapdims((2, 1, 3)), mlp_layer(npatches, tokenplanes; activation, @@ -31,14 +33,9 @@ function mixerblock(planes, npatches; mlp_ratio = (0.5, 4.0), mlp_layer = mlp_bl DropPath(drop_path_rate)), +)) end -struct MLPMixer - layers::Any -end -@functor MLPMixer - """ - MLPMixer(size::Symbol = :base; patch_size::Dims{2} = (16, 16), - imsize::Dims{2} = (224, 224), drop_path_rate = 0., nclasses = 1000) +MLPMixer(size::Symbol; patch_size::Dims{2} = (16, 16), imsize::Dims{2} = (224, 224), + inchannels::Integer = 3, nclasses::Integer = 1000) Creates a model with the MLPMixer architecture. ([reference](https://arxiv.org/pdf/2105.01601)). @@ -49,17 +46,22 @@ Creates a model with the MLPMixer architecture. - `patch_size`: the size of the patches - `imsize`: the size of the input image - `drop_path_rate`: Stochastic depth rate + - `inchannels`: the number of input channels - `nclasses`: number of output classes See also [`Metalhead.mlpmixer`](#). """ -function MLPMixer(size::Symbol = :base; patch_size::Dims{2} = (16, 16), - imsize::Dims{2} = (224, 224), drop_path_rate = 0.0, nclasses = 1000) +struct MLPMixer + layers::Any +end +@functor MLPMixer + +function MLPMixer(size::Symbol; imsize::Dims{2} = (224, 224), patch_size::Dims{2} = (16, 16), + inchannels::Integer = 3, nclasses::Integer = 1000) _checkconfig(size, keys(MIXER_CONFIGS)) depth = MIXER_CONFIGS[size][:depth] embedplanes = MIXER_CONFIGS[size][:planes] - layers = mlpmixer(mixerblock, imsize; patch_size, embedplanes, depth, drop_path_rate, - nclasses) + layers = mlpmixer(mixerblock, imsize; patch_size, embedplanes, depth, inchannels,nclasses) return MLPMixer(layers) end diff --git a/src/mixers/resmlp.jl b/src/mixers/resmlp.jl index 17e340310..f2c9ece15 100644 --- a/src/mixers/resmlp.jl +++ b/src/mixers/resmlp.jl @@ -1,6 +1,6 @@ """ resmixerblock(planes, npatches; dropout_rate = 0., drop_path_rate = 0., mlp_ratio = 4.0, - activation = gelu, λ = 1e-4) + activation = gelu, layerscale_init = 1e-4) Creates a block for the ResMixer architecture. ([reference](https://arxiv.org/abs/2105.03404)). @@ -15,33 +15,28 @@ Creates a block for the ResMixer architecture. - `dropout_rate`: the dropout rate to use in the MLP blocks - `drop_path_rate`: Stochastic depth rate - `activation`: the activation function to use in the MLP blocks - - `λ`: initialisation constant for the LayerScale + - `layerscale_init`: initialisation constant for the LayerScale """ -function resmixerblock(planes, npatches; mlp_ratio = 4.0, mlp_layer = mlp_block, - dropout_rate = 0.0, drop_path_rate = 0.0, activation = gelu, - λ = 1e-4) +function resmixerblock(planes::Integer, npatches::Integer; mlp_layer = mlp_block, + mlp_ratio = 4.0, layerscale_init = 1e-4, dropout_rate = 0.0, + drop_path_rate = 0.0, activation = gelu) return Chain(SkipConnection(Chain(Flux.Scale(planes), swapdims((2, 1, 3)), Dense(npatches, npatches), swapdims((2, 1, 3)), - LayerScale(planes, λ), + LayerScale(planes, layerscale_init), DropPath(drop_path_rate)), +), SkipConnection(Chain(Flux.Scale(planes), mlp_layer(planes, Int(mlp_ratio * planes); dropout_rate, activation), - LayerScale(planes, λ), + LayerScale(planes, layerscale_init), DropPath(drop_path_rate)), +)) end -struct ResMLP - layers::Any -end -@functor ResMLP - """ - ResMLP(size::Symbol = :base; patch_size::Dims{2} = (16, 16), imsize::Dims{2} = (224, 224), - drop_path_rate = 0., nclasses = 1000) + ResMLP(size::Symbol; patch_size::Dims{2} = (16, 16), imsize::Dims{2} = (224, 224), + inchannels::Integer = 3, nclasses::Integer = 1000) Creates a model with the ResMLP architecture. ([reference](https://arxiv.org/abs/2105.03404)). @@ -51,18 +46,23 @@ Creates a model with the ResMLP architecture. - `size`: the size of the model - one of `small`, `base`, `large` or `huge` - `patch_size`: the size of the patches - `imsize`: the size of the input image - - `drop_path_rate`: Stochastic depth rate + - `inchannels`: the number of input channels - `nclasses`: number of output classes See also [`Metalhead.mlpmixer`](#). """ -function ResMLP(size::Symbol = :base; patch_size::Dims{2} = (16, 16), - imsize::Dims{2} = (224, 224), drop_path_rate = 0.0, nclasses = 1000) +struct ResMLP + layers::Any +end +@functor ResMLP + +function ResMLP(size::Symbol; imsize::Dims{2} = (224, 224), patch_size::Dims{2} = (16, 16), + inchannels::Integer = 3, nclasses::Integer = 1000) _checkconfig(size, keys(MIXER_CONFIGS)) depth = MIXER_CONFIGS[size][:depth] embedplanes = MIXER_CONFIGS[size][:planes] layers = mlpmixer(resmixerblock, imsize; mlp_ratio = 4.0, patch_size, embedplanes, - drop_path_rate, depth, nclasses) + depth, inchannels, nclasses) return ResMLP(layers) end diff --git a/src/vit-based/vit.jl b/src/vit-based/vit.jl index 1fece2191..1c049e46e 100644 --- a/src/vit-based/vit.jl +++ b/src/vit-based/vit.jl @@ -12,7 +12,8 @@ Transformer as used in the base ViT architecture. - `mlp_ratio`: ratio of MLP layers to the number of input channels - `dropout_rate`: dropout rate """ -function transformer_encoder(planes, depth, nheads; mlp_ratio = 4.0, dropout_rate = 0.0) +function transformer_encoder(planes::Integer, depth::Integer, nheads::Integer; + mlp_ratio = 4.0, dropout_rate = 0.0) layers = [Chain(SkipConnection(prenorm(planes, MHAttention(planes, nheads; attn_dropout_rate = dropout_rate, @@ -26,9 +27,9 @@ function transformer_encoder(planes, depth, nheads; mlp_ratio = 4.0, dropout_rat end """ - vit(imsize::Dims{2} = (256, 256); inchannels = 3, patch_size::Dims{2} = (16, 16), + vit(imsize::Dims{2} = (256, 256); inchannels::Integer = 3, patch_size::Dims{2} = (16, 16), embedplanes = 768, depth = 6, nheads = 16, mlp_ratio = 4.0, dropout_rate = 0.1, - emb_dropout_rate = 0.1, pool = :class, nclasses = 1000) + emb_dropout_rate = 0.1, pool = :class, nclasses::Integer = 1000) Creates a Vision Transformer (ViT) model. ([reference](https://arxiv.org/abs/2010.11929)). @@ -47,9 +48,10 @@ Creates a Vision Transformer (ViT) model. - `pool`: pooling type, either :class or :mean - `nclasses`: number of classes in the output """ -function vit(imsize::Dims{2} = (256, 256); inchannels = 3, patch_size::Dims{2} = (16, 16), - embedplanes = 768, depth = 6, nheads = 16, mlp_ratio = 4.0, dropout_rate = 0.1, - emb_dropout_rate = 0.1, pool = :class, nclasses = 1000) +function vit(imsize::Dims{2} = (256, 256); inchannels::Integer = 3, + patch_size::Dims{2} = (16, 16), embedplanes::Integer = 768, + depth::Integer = 6, nheads::Integer = 16, mlp_ratio = 4.0, dropout_rate = 0.1, + emb_dropout_rate = 0.1, pool::Symbol = :class, nclasses::Integer = 1000) @assert pool in [:class, :mean] "Pool type must be either `:class` (class token) or `:mean` (mean pooling)" npatches = prod(imsize .÷ patch_size) @@ -74,8 +76,8 @@ const VIT_CONFIGS = Dict(:tiny => (depth = 12, embedplanes = 192, nheads = 3), mlp_ratio = 64 // 13)) """ - ViT(mode::Symbol = base; imsize::Dims{2} = (256, 256), inchannels = 3, - patch_size::Dims{2} = (16, 16), pool = :class, nclasses = 1000) + ViT(mode::Symbol = base; imsize::Dims{2} = (256, 256), inchannels::Integer = 3, + patch_size::Dims{2} = (16, 16), pool = :class, nclasses::Integer = 1000) Creates a Vision Transformer (ViT) model. ([reference](https://arxiv.org/abs/2010.11929)). @@ -97,11 +99,11 @@ struct ViT end @functor ViT -function ViT(mode::Symbol = :base; imsize::Dims{2} = (256, 256), inchannels = 3, - patch_size::Dims{2} = (16, 16), pool = :class, nclasses = 1000) +function ViT(mode::Symbol = :base; imsize::Dims{2} = (256, 256), patch_size::Dims{2} = (16, 16), + inchannels::Integer = 3, nclasses::Integer = 1000) _checkconfig(mode, keys(VIT_CONFIGS)) kwargs = VIT_CONFIGS[mode] - layers = vit(imsize; inchannels, patch_size, nclasses, pool, kwargs...) + layers = vit(imsize; inchannels, patch_size, nclasses, kwargs...) return ViT(layers) end diff --git a/test/convnets.jl b/test/convnets.jl index e62b14299..35a745b87 100644 --- a/test/convnets.jl +++ b/test/convnets.jl @@ -263,15 +263,12 @@ end end end - @testset "ConvNeXt" verbose = true begin @testset for mode in [:small, :base, :large, :tiny, :xlarge] - @testset for drop_path_rate in [0.0, 0.5] - m = ConvNeXt(mode; drop_path_rate) - @test size(m(x_224)) == (1000, 1) - @test gradtest(m, x_224) - _gc() - end + m = ConvNeXt(mode) + @test size(m(x_224)) == (1000, 1) + @test gradtest(m, x_224) + _gc() end end diff --git a/test/mixers.jl b/test/mixers.jl index 885ff5838..51cdd736e 100644 --- a/test/mixers.jl +++ b/test/mixers.jl @@ -1,32 +1,8 @@ -@testset "MLPMixer" begin - @testset for mode in [:small, :base, :large] #:huge] - @testset for drop_path_rate in [0.0, 0.5] - m = MLPMixer(mode; drop_path_rate) - @test size(m(x_224)) == (1000, 1) - @test gradtest(m, x_224) - _gc() - end - end -end - -@testset "ResMLP" begin - @testset for mode in [:small, :base, :large] #:huge] - @testset for drop_path_rate in [0.0, 0.5] - m = ResMLP(mode; drop_path_rate) - @test size(m(x_224)) == (1000, 1) - @test gradtest(m, x_224) - _gc() - end - end -end - -@testset "gMLP" begin - @testset for mode in [:small, :base, :large] #:huge] - @testset for drop_path_rate in [0.0, 0.5] - m = gMLP(mode; drop_path_rate) - @test size(m(x_224)) == (1000, 1) - @test gradtest(m, x_224) - _gc() - end +@testset for model in [MLPMixer, ResMLP, gMLP] + @testset for mode in [:small, :base, :large] + m = model(mode) + @test size(m(x_224)) == (1000, 1) + @test gradtest(m, x_224) + _gc() end end From 5aece44e27505d4cc8ab14efd76ad223b6a1c6ad Mon Sep 17 00:00:00 2001 From: Abhirath Anand <74202102+theabhirath@users.noreply.github.com> Date: Tue, 2 Aug 2022 10:07:51 +0530 Subject: [PATCH 5/8] Use `create_classifier` more --- src/convnets/alexnet.jl | 34 +++++++------- src/convnets/convmixer.jl | 3 +- src/convnets/convnext.jl | 9 ++-- src/convnets/densenet.jl | 5 +-- src/convnets/efficientnet.jl | 14 +++--- src/convnets/inception/googlenet.jl | 48 ++++++++++---------- src/convnets/inception/inceptionresnetv2.jl | 35 +++++++-------- src/convnets/inception/inceptionv3.jl | 43 +++++++++--------- src/convnets/inception/inceptionv4.jl | 49 ++++++++++----------- src/convnets/inception/xception.jl | 23 +++++----- src/convnets/mobilenet/mobilenetv1.jl | 6 +-- src/convnets/mobilenet/mobilenetv2.jl | 16 +++---- src/convnets/mobilenet/mobilenetv3.jl | 14 +++--- src/convnets/squeezenet.jl | 31 +++++++------ src/convnets/vgg.jl | 2 +- src/layers/mlp.jl | 21 +++++---- src/mixers/core.jl | 7 ++- src/mixers/mlpmixer.jl | 8 ++-- src/vit-based/vit.jl | 3 +- 19 files changed, 175 insertions(+), 196 deletions(-) diff --git a/src/convnets/alexnet.jl b/src/convnets/alexnet.jl index 75ba5ad48..6b384f80c 100644 --- a/src/convnets/alexnet.jl +++ b/src/convnets/alexnet.jl @@ -9,22 +9,21 @@ Create an AlexNet model - `nclasses`: the number of output classes """ function alexnet(; inchannels::Integer = 3, nclasses::Integer = 1000) - layers = Chain(Chain(Conv((11, 11), inchannels => 64, relu; stride = (4, 4), pad = (2, 2)), - MaxPool((3, 3); stride = (2, 2)), - Conv((5, 5), 64 => 192, relu; pad = (2, 2)), - MaxPool((3, 3); stride = (2, 2)), - Conv((3, 3), 192 => 384, relu; pad = (1, 1)), - Conv((3, 3), 384 => 256, relu; pad = (1, 1)), - Conv((3, 3), 256 => 256, relu; pad = (1, 1)), - MaxPool((3, 3); stride = (2, 2)), - AdaptiveMeanPool((6, 6))), - Chain(MLUtils.flatten, - Dropout(0.5), - Dense(256 * 6 * 6, 4096, relu), - Dropout(0.5), - Dense(4096, 4096, relu), - Dense(4096, nclasses))) - return layers + backbone = Chain(Conv((11, 11), inchannels => 64, relu; stride = 4, pad = 2), + MaxPool((3, 3); stride = 2), + Conv((5, 5), 64 => 192, relu; pad = 2), + MaxPool((3, 3); stride = 2), + Conv((3, 3), 192 => 384, relu; pad = 1), + Conv((3, 3), 384 => 256, relu; pad = 1), + Conv((3, 3), 256 => 256, relu; pad = 1), + MaxPool((3, 3); stride = 2)) + classifier = Chain(AdaptiveMeanPool((6, 6)), MLUtils.flatten, + Dropout(0.5), + Dense(256 * 6 * 6, 4096, relu), + Dropout(0.5), + Dense(4096, 4096, relu), + Dense(4096, nclasses)) + return Chain(backbone, classifier) end """ @@ -47,7 +46,8 @@ struct AlexNet end @functor AlexNet -function AlexNet(; pretrain::Bool = false, inchannels::Integer = 3, nclasses::Integer = 1000) +function AlexNet(; pretrain::Bool = false, inchannels::Integer = 3, + nclasses::Integer = 1000) layers = alexnet(; inchannels, nclasses) if pretrain loadpretrain!(layers, "AlexNet") diff --git a/src/convnets/convmixer.jl b/src/convnets/convmixer.jl index c75303184..efde886cb 100644 --- a/src/convnets/convmixer.jl +++ b/src/convnets/convmixer.jl @@ -26,8 +26,7 @@ function convmixer(planes::Integer, depth::Integer; kernel_size = (9, 9), pad = SamePad())), +), conv_norm((1, 1), planes, planes, activation; preact = true)...) for _ in 1:depth] - head = Chain(AdaptiveMeanPool((1, 1)), MLUtils.flatten, Dense(planes, nclasses)) - return Chain(Chain(stem..., Chain(blocks)), head) + return Chain(Chain(stem..., Chain(blocks)), create_classifier(planes, nclasses)) end const CONVMIXER_CONFIGS = Dict(:base => Dict(:planes => 1536, :depth => 20, diff --git a/src/convnets/convnext.jl b/src/convnets/convnext.jl index d7c39cc04..7bb265c24 100644 --- a/src/convnets/convnext.jl +++ b/src/convnets/convnext.jl @@ -63,11 +63,10 @@ function convnext(depths::Vector{<:Integer}, planes::Vector{<:Integer}; cur += depths[i] end backbone = collect(Iterators.flatten(Iterators.flatten(zip(downsample_layers, stages)))) - head = Chain(GlobalMeanPool(), - MLUtils.flatten, - LayerNorm(planes[end]), - Dense(planes[end], nclasses)) - return Chain(Chain(backbone), head) + classifier = Chain(GlobalMeanPool(), MLUtils.flatten, + LayerNorm(planes[end]), + Dense(planes[end], nclasses)) + return Chain(Chain(backbone...), classifier) end # Configurations for ConvNeXt models diff --git a/src/convnets/densenet.jl b/src/convnets/densenet.jl index 0b164e2ab..9720a0212 100644 --- a/src/convnets/densenet.jl +++ b/src/convnets/densenet.jl @@ -83,10 +83,7 @@ function densenet(inplanes::Integer, growth_rates; reduction = 0.5, inchannels:: inplanes = floor(Int, outplanes * reduction) end push!(layers, BatchNorm(outplanes, relu)) - return Chain(Chain(layers), - Chain(AdaptiveMeanPool((1, 1)), - MLUtils.flatten, - Dense(outplanes, nclasses))) + return Chain(Chain(layers...), create_classifier(outplanes, nclasses)) end """ diff --git a/src/convnets/efficientnet.jl b/src/convnets/efficientnet.jl index 730840fa4..86ba9373f 100644 --- a/src/convnets/efficientnet.jl +++ b/src/convnets/efficientnet.jl @@ -28,8 +28,8 @@ function efficientnet(scalings, block_configs; max_width::Integer = 1280, scalew(w) = wscale ≈ 1 ? w : ceil(Int64, wscale * w) scaled(d) = dscale ≈ 1 ? d : ceil(Int64, dscale * d) out_channels = _round_channels(scalew(32), 8) - stem = conv_norm((3, 3), inchannels, out_channels, swish; - bias = false, stride = 2, pad = SamePad()) + stem = conv_norm((3, 3), inchannels, out_channels, swish; bias = false, stride = 2, + pad = SamePad()) blocks = [] for (n, k, s, e, i, o) in block_configs in_channels = _round_channels(scalew(i), 8) @@ -44,13 +44,11 @@ function efficientnet(scalings, block_configs; max_width::Integer = 1280, stride = 1, reduction = 4)) end end - blocks = Chain(blocks...) head_out_channels = _round_channels(max_width, 8) - head = conv_norm((1, 1), out_channels, head_out_channels, swish; - bias = false, pad = SamePad()) - top = Dense(head_out_channels, nclasses) - return Chain(Chain([stem..., blocks, head...]), - Chain(AdaptiveMeanPool((1, 1)), MLUtils.flatten, top)) + append!(blocks, + conv_norm((1, 1), out_channels, head_out_channels, swish; + bias = false, pad = SamePad())) + return Chain(Chain(stem..., blocks...), create_classifier(head_out_channels, nclasses)) end # n: # of block repetitions diff --git a/src/convnets/inception/googlenet.jl b/src/convnets/inception/googlenet.jl index 90f92ddfc..a72ba5e6c 100644 --- a/src/convnets/inception/googlenet.jl +++ b/src/convnets/inception/googlenet.jl @@ -36,32 +36,29 @@ Create an Inception-v1 model (commonly referred to as GoogLeNet) - `nclasses`: the number of output classes """ -function googlenet(; nclasses::Integer = 1000) - layers = Chain(Chain(Conv((7, 7), 3 => 64; stride = 2, pad = 3), - MaxPool((3, 3); stride = 2, pad = 1), - Conv((1, 1), 64 => 64), - Conv((3, 3), 64 => 192; pad = 1), - MaxPool((3, 3); stride = 2, pad = 1), - _inceptionblock(192, 64, 96, 128, 16, 32, 32), - _inceptionblock(256, 128, 128, 192, 32, 96, 64), - MaxPool((3, 3); stride = 2, pad = 1), - _inceptionblock(480, 192, 96, 208, 16, 48, 64), - _inceptionblock(512, 160, 112, 224, 24, 64, 64), - _inceptionblock(512, 128, 128, 256, 24, 64, 64), - _inceptionblock(512, 112, 144, 288, 32, 64, 64), - _inceptionblock(528, 256, 160, 320, 32, 128, 128), - MaxPool((3, 3); stride = 2, pad = 1), - _inceptionblock(832, 256, 160, 320, 32, 128, 128), - _inceptionblock(832, 384, 192, 384, 48, 128, 128)), - Chain(AdaptiveMeanPool((1, 1)), - MLUtils.flatten, - Dropout(0.4), - Dense(1024, nclasses))) - return layers +function googlenet(; inchannels::Integer = 3, nclasses::Integer = 1000) + backbone = Chain(Conv((7, 7), inchannels => 64; stride = 2, pad = 3), + MaxPool((3, 3); stride = 2, pad = 1), + Conv((1, 1), 64 => 64), + Conv((3, 3), 64 => 192; pad = 1), + MaxPool((3, 3); stride = 2, pad = 1), + _inceptionblock(192, 64, 96, 128, 16, 32, 32), + _inceptionblock(256, 128, 128, 192, 32, 96, 64), + MaxPool((3, 3); stride = 2, pad = 1), + _inceptionblock(480, 192, 96, 208, 16, 48, 64), + _inceptionblock(512, 160, 112, 224, 24, 64, 64), + _inceptionblock(512, 128, 128, 256, 24, 64, 64), + _inceptionblock(512, 112, 144, 288, 32, 64, 64), + _inceptionblock(528, 256, 160, 320, 32, 128, 128), + MaxPool((3, 3); stride = 2, pad = 1), + _inceptionblock(832, 256, 160, 320, 32, 128, 128), + _inceptionblock(832, 384, 192, 384, 48, 128, 128)) + classifier = create_classifier(1024, nclasses; dropout_rate = 0.4) + return Chain(backbone, classifier) end """ - GoogLeNet(; pretrain::Bool = false, nclasses::Integer = 1000) + GoogLeNet(; pretrain::Bool = false, inchannels::Integer = 3, nclasses::Integer = 1000) Create an Inception-v1 model (commonly referred to as `GoogLeNet`) ([reference](https://arxiv.org/abs/1409.4842v1)). @@ -82,8 +79,9 @@ struct GoogLeNet end @functor GoogLeNet -function GoogLeNet(; pretrain::Bool = false, nclasses::Integer = 1000) - layers = googlenet(; nclasses = nclasses) +function GoogLeNet(; pretrain::Bool = false, inchannels::Integer = 3, + nclasses::Integer = 1000) + layers = googlenet(; inchannels, nclasses) if pretrain loadpretrain!(layers, "GoogLeNet") end diff --git a/src/convnets/inception/inceptionresnetv2.jl b/src/convnets/inception/inceptionresnetv2.jl index 747da2fb2..96b391b65 100644 --- a/src/convnets/inception/inceptionresnetv2.jl +++ b/src/convnets/inception/inceptionresnetv2.jl @@ -77,24 +77,23 @@ Creates an InceptionResNetv2 model. """ function inceptionresnetv2(; inchannels::Integer = 3, dropout_rate = 0.0, nclasses::Integer = 1000) - body = Chain(conv_norm((3, 3), inchannels, 32; stride = 2)..., - conv_norm((3, 3), 32, 32)..., - conv_norm((3, 3), 32, 64; pad = 1)..., - MaxPool((3, 3); stride = 2), - conv_norm((3, 3), 64, 80)..., - conv_norm((3, 3), 80, 192)..., - MaxPool((3, 3); stride = 2), - mixed_5b(), - [block35(0.17f0) for _ in 1:10]..., - mixed_6a(), - [block17(0.10f0) for _ in 1:20]..., - mixed_7a(), - [block8(0.20f0) for _ in 1:9]..., - block8(; activation = relu), - conv_norm((1, 1), 2080, 1536)...) - head = Chain(GlobalMeanPool(), MLUtils.flatten, Dropout(dropout_rate), - Dense(1536, nclasses)) - return Chain(body, head) + backbone = Chain(conv_norm((3, 3), inchannels, 32; stride = 2)..., + conv_norm((3, 3), 32, 32)..., + conv_norm((3, 3), 32, 64; pad = 1)..., + MaxPool((3, 3); stride = 2), + conv_norm((3, 3), 64, 80)..., + conv_norm((3, 3), 80, 192)..., + MaxPool((3, 3); stride = 2), + mixed_5b(), + [block35(0.17f0) for _ in 1:10]..., + mixed_6a(), + [block17(0.10f0) for _ in 1:20]..., + mixed_7a(), + [block8(0.20f0) for _ in 1:9]..., + block8(; activation = relu), + conv_norm((1, 1), 2080, 1536)...) + classifier = create_classifier(1536, nclasses; dropout_rate) + return Chain(backbone, classifier) end """ diff --git a/src/convnets/inception/inceptionv3.jl b/src/convnets/inception/inceptionv3.jl index 8d9977d80..8a5e19849 100644 --- a/src/convnets/inception/inceptionv3.jl +++ b/src/convnets/inception/inceptionv3.jl @@ -136,29 +136,26 @@ Create an Inception-v3 model ([reference](https://arxiv.org/abs/1512.00567v3)). - `nclasses`: the number of output classes """ function inceptionv3(; inchannels::Integer = 3, nclasses::Integer = 1000) - layer = Chain(Chain(conv_norm((3, 3), inchannels, 32; stride = 2)..., - conv_norm((3, 3), 32, 32)..., - conv_norm((3, 3), 32, 64; pad = 1)..., - MaxPool((3, 3); stride = 2), - conv_norm((1, 1), 64, 80)..., - conv_norm((3, 3), 80, 192)..., - MaxPool((3, 3); stride = 2), - inceptionv3_a(192, 32), - inceptionv3_a(256, 64), - inceptionv3_a(288, 64), - inceptionv3_b(288), - inceptionv3_c(768, 128), - inceptionv3_c(768, 160), - inceptionv3_c(768, 160), - inceptionv3_c(768, 192), - inceptionv3_d(768), - inceptionv3_e(1280), - inceptionv3_e(2048)), - Chain(AdaptiveMeanPool((1, 1)), - Dropout(0.2), - MLUtils.flatten, - Dense(2048, nclasses))) - return layer + backbone = Chain(conv_norm((3, 3), inchannels, 32; stride = 2)..., + conv_norm((3, 3), 32, 32)..., + conv_norm((3, 3), 32, 64; pad = 1)..., + MaxPool((3, 3); stride = 2), + conv_norm((1, 1), 64, 80)..., + conv_norm((3, 3), 80, 192)..., + MaxPool((3, 3); stride = 2), + inceptionv3_a(192, 32), + inceptionv3_a(256, 64), + inceptionv3_a(288, 64), + inceptionv3_b(288), + inceptionv3_c(768, 128), + inceptionv3_c(768, 160), + inceptionv3_c(768, 160), + inceptionv3_c(768, 192), + inceptionv3_d(768), + inceptionv3_e(1280), + inceptionv3_e(2048)) + classifier = create_classifier(2048, nclasses; dropout_rate = 0.2) + return Chain(backbone, classifier) end """ diff --git a/src/convnets/inception/inceptionv4.jl b/src/convnets/inception/inceptionv4.jl index b84232fb8..8d4f00eb2 100644 --- a/src/convnets/inception/inceptionv4.jl +++ b/src/convnets/inception/inceptionv4.jl @@ -95,31 +95,30 @@ Create an Inceptionv4 model. """ function inceptionv4(; dropout_rate = 0.0, inchannels::Integer = 3, nclasses::Integer = 1000) - body = Chain(conv_norm((3, 3), inchannels, 32; stride = 2)..., - conv_norm((3, 3), 32, 32)..., - conv_norm((3, 3), 32, 64; pad = 1)..., - mixed_3a(), - mixed_4a(), - mixed_5a(), - inceptionv4_a(), - inceptionv4_a(), - inceptionv4_a(), - inceptionv4_a(), - reduction_a(), # mixed_6a - inceptionv4_b(), - inceptionv4_b(), - inceptionv4_b(), - inceptionv4_b(), - inceptionv4_b(), - inceptionv4_b(), - inceptionv4_b(), - reduction_b(), # mixed_7a - inceptionv4_c(), - inceptionv4_c(), - inceptionv4_c()) - head = Chain(GlobalMeanPool(), MLUtils.flatten, Dropout(dropout_rate), - Dense(1536, nclasses)) - return Chain(body, head) + backbone = Chain(conv_norm((3, 3), inchannels, 32; stride = 2)..., + conv_norm((3, 3), 32, 32)..., + conv_norm((3, 3), 32, 64; pad = 1)..., + mixed_3a(), + mixed_4a(), + mixed_5a(), + inceptionv4_a(), + inceptionv4_a(), + inceptionv4_a(), + inceptionv4_a(), + reduction_a(), # mixed_6a + inceptionv4_b(), + inceptionv4_b(), + inceptionv4_b(), + inceptionv4_b(), + inceptionv4_b(), + inceptionv4_b(), + inceptionv4_b(), + reduction_b(), # mixed_7a + inceptionv4_c(), + inceptionv4_c(), + inceptionv4_c()) + classifier = create_classifier(1536, nclasses; dropout_rate) + return Chain(backbone, classifier) end """ diff --git a/src/convnets/inception/xception.jl b/src/convnets/inception/xception.jl index 8d2ad13d8..71a4efc15 100644 --- a/src/convnets/inception/xception.jl +++ b/src/convnets/inception/xception.jl @@ -57,18 +57,17 @@ Creates an Xception model. - `nclasses`: the number of output classes. """ function xception(; dropout_rate = 0.0, inchannels::Integer = 3, nclasses::Integer = 1000) - body = Chain(conv_norm((3, 3), inchannels, 32; stride = 2, bias = false)..., - conv_norm((3, 3), 32, 64; bias = false)..., - xception_block(64, 128, 2; stride = 2, start_with_relu = false), - xception_block(128, 256, 2; stride = 2), - xception_block(256, 728, 2; stride = 2), - [xception_block(728, 728, 3) for _ in 1:8]..., - xception_block(728, 1024, 2; stride = 2, grow_at_start = false), - depthwise_sep_conv_norm((3, 3), 1024, 1536; pad = 1)..., - depthwise_sep_conv_norm((3, 3), 1536, 2048; pad = 1)...) - head = Chain(GlobalMeanPool(), MLUtils.flatten, Dropout(dropout_rate), - Dense(2048, nclasses)) - return Chain(body, head) + backbone = Chain(conv_norm((3, 3), inchannels, 32; stride = 2, bias = false)..., + conv_norm((3, 3), 32, 64; bias = false)..., + xception_block(64, 128, 2; stride = 2, start_with_relu = false), + xception_block(128, 256, 2; stride = 2), + xception_block(256, 728, 2; stride = 2), + [xception_block(728, 728, 3) for _ in 1:8]..., + xception_block(728, 1024, 2; stride = 2, grow_at_start = false), + depthwise_sep_conv_norm((3, 3), 1024, 1536; pad = 1)..., + depthwise_sep_conv_norm((3, 3), 1536, 2048; pad = 1)...) + classifier = create_classifier(2048, nclasses; dropout_rate) + return Chain(backbone, classifier) end """ diff --git a/src/convnets/mobilenet/mobilenetv1.jl b/src/convnets/mobilenet/mobilenetv1.jl index e31f8835b..ca20b4a64 100644 --- a/src/convnets/mobilenet/mobilenetv1.jl +++ b/src/convnets/mobilenet/mobilenetv1.jl @@ -34,11 +34,7 @@ function mobilenetv1(width_mult::Number, config::Vector{<:Tuple}; activation = r inchannels = outch end end - - return Chain(Chain(layers), - Chain(GlobalMeanPool(), - MLUtils.flatten, - Dense(inchannels, nclasses))) + return Chain(Chain(layers...), create_classifier(inchannels, nclasses)) end # Layer configurations for MobileNetv1 diff --git a/src/convnets/mobilenet/mobilenetv2.jl b/src/convnets/mobilenet/mobilenetv2.jl index 9dd35e9f9..59e147829 100644 --- a/src/convnets/mobilenet/mobilenetv2.jl +++ b/src/convnets/mobilenet/mobilenetv2.jl @@ -25,13 +25,14 @@ Create a MobileNetv2 model. function mobilenetv2(width_mult::Number, configs::Vector{<:Tuple}; max_width::Integer = 1280, inchannels::Integer = 3, nclasses::Integer = 1000) + divisor = width_mult == 0.1 ? 4 : 8 # building first layer - inplanes = _round_channels(32 * width_mult, width_mult == 0.1 ? 4 : 8) + inplanes = _round_channels(32 * width_mult, divisor) layers = [] append!(layers, conv_norm((3, 3), inchannels, inplanes; pad = 1, stride = 2)) # building inverted residual blocks for (t, c, n, s, a) in configs - outplanes = _round_channels(c * width_mult, width_mult == 0.1 ? 4 : 8) + outplanes = _round_channels(c * width_mult, divisor) for i in 1:n push!(layers, invertedresidual((3, 3), inplanes, outplanes, a; expansion = t, @@ -39,14 +40,11 @@ function mobilenetv2(width_mult::Number, configs::Vector{<:Tuple}; inplanes = outplanes end end - # building last several layers - outplanes = (width_mult > 1) ? - _round_channels(max_width * width_mult, width_mult == 0.1 ? 4 : 8) : + # building last layers + outplanes = width_mult > 1 ? _round_channels(max_width * width_mult, divisor) : max_width - return Chain(Chain(Chain(layers), - conv_norm((1, 1), inplanes, outplanes, relu6; bias = false)...), - Chain(AdaptiveMeanPool((1, 1)), MLUtils.flatten, - Dense(outplanes, nclasses))) + append!(layers, conv_norm((1, 1), inplanes, outplanes, relu6; bias = false)) + return Chain(Chain(layers...), create_classifier(outplanes, nclasses)) end # Layer configurations for MobileNetv2 diff --git a/src/convnets/mobilenet/mobilenetv3.jl b/src/convnets/mobilenet/mobilenetv3.jl index 00c0e0139..1c5e5825b 100644 --- a/src/convnets/mobilenet/mobilenetv3.jl +++ b/src/convnets/mobilenet/mobilenetv3.jl @@ -44,16 +44,16 @@ function mobilenetv3(width_mult::Number, configs::Vector{<:Tuple}; stride = s, reduction = r)) inplanes = outplanes end - # building last several layers + # building last layers output_channel = max_width output_channel = width_mult > 1.0 ? _round_channels(output_channel * width_mult, 8) : output_channel - classifier = Chain(Dense(explanes, output_channel, hardswish), + append!(layers, conv_norm((1, 1), inplanes, explanes, hardswish; bias = false)) + classifier = Chain(AdaptiveMeanPool((1, 1)), MLUtils.flatten, + Dense(explanes, output_channel, hardswish), Dropout(0.2), Dense(output_channel, nclasses)) - return Chain(Chain(Chain(layers), - conv_norm((1, 1), inplanes, explanes, hardswish; bias = false)...), - Chain(AdaptiveMeanPool((1, 1)), MLUtils.flatten, classifier)) + return Chain(Chain(layers...), classifier) end # Layer configurations for small and large models for MobileNetv3 @@ -91,7 +91,7 @@ const MOBILENETV3_CONFIGS = Dict(:small => [ ]) """ - MobileNetv3(mode::Symbol = :small, width_mult::Number = 1; pretrain::Bool = false, + MobileNetv3(mode::Symbol; width_mult::Number = 1, pretrain::Bool = false, inchannels::Integer = 3, nclasses::Integer = 1000) Create a MobileNetv3 model with the specified configuration. @@ -115,7 +115,7 @@ struct MobileNetv3 end @functor MobileNetv3 -function MobileNetv3(mode::Symbol = :small, width_mult::Number = 1; pretrain::Bool = false, +function MobileNetv3(mode::Symbol; width_mult::Number = 1, pretrain::Bool = false, inchannels::Integer = 3, nclasses::Integer = 1000) _checkconfig(mode, [:small, :large]) max_width = (mode == :large) ? 1280 : 1024 diff --git a/src/convnets/squeezenet.jl b/src/convnets/squeezenet.jl index 3ee6653bc..b3cfb0293 100644 --- a/src/convnets/squeezenet.jl +++ b/src/convnets/squeezenet.jl @@ -32,22 +32,21 @@ Create a SqueezeNet - `nclasses`: the number of output classes. """ function squeezenet(; inchannels::Integer = 3, nclasses::Integer = 1000) - return Chain(Chain(Conv((3, 3), inchannels => 64, relu; stride = 2), - MaxPool((3, 3); stride = 2), - fire(64, 16, 64, 64), - fire(128, 16, 64, 64), - MaxPool((3, 3); stride = 2), - fire(128, 32, 128, 128), - fire(256, 32, 128, 128), - MaxPool((3, 3); stride = 2), - fire(256, 48, 192, 192), - fire(384, 48, 192, 192), - fire(384, 64, 256, 256), - fire(512, 64, 256, 256), - Dropout(0.5), - Conv((1, 1), 512 => nclasses, relu)), - AdaptiveMeanPool((1, 1)), - MLUtils.flatten) + backbone = Chain(Conv((3, 3), inchannels => 64, relu; stride = 2), + MaxPool((3, 3); stride = 2), + fire(64, 16, 64, 64), + fire(128, 16, 64, 64), + MaxPool((3, 3); stride = 2), + fire(128, 32, 128, 128), + fire(256, 32, 128, 128), + MaxPool((3, 3); stride = 2), + fire(256, 48, 192, 192), + fire(384, 48, 192, 192), + fire(384, 64, 256, 256), + fire(512, 64, 256, 256)) + classifier = Chain(Dropout(0.5), Conv((1, 1), 512 => nclasses, relu), + AdaptiveMeanPool((1, 1)), MLUtils.flatten) + return Chain(backbone, classifier) end """ diff --git a/src/convnets/vgg.jl b/src/convnets/vgg.jl index 0b6026eb8..e685620a3 100644 --- a/src/convnets/vgg.jl +++ b/src/convnets/vgg.jl @@ -99,7 +99,7 @@ function vgg(imsize::Dims{2}; config, batchnorm::Bool = false, fcsize::Integer = conv = vgg_convolutional_layers(config, batchnorm, inchannels) imsize = outputsize(conv, (imsize..., inchannels); padbatch = true)[1:3] class = vgg_classifier_layers(imsize, nclasses, fcsize, dropout_rate) - return Chain(Chain(conv), class) + return Chain(Chain(conv...), class) end const VGG_CONV_CONFIGS = Dict(:A => [(64, 1), (128, 1), (256, 2), (512, 2), (512, 2)], diff --git a/src/layers/mlp.jl b/src/layers/mlp.jl index 3a1c27413..9b8c48de2 100644 --- a/src/layers/mlp.jl +++ b/src/layers/mlp.jl @@ -47,7 +47,7 @@ end gated_mlp_block(::typeof(identity), args...; kwargs...) = mlp_block(args...; kwargs...) """ - create_classifier(inplanes::Integer, nclasses::Integer; + create_classifier(inplanes::Integer, nclasses::Integer, activation = relu; pool_layer = AdaptiveMeanPool((1, 1)), dropout_rate = 0.0, use_conv::Bool = false) @@ -57,26 +57,25 @@ Creates a classifier head to be used for models. - `inplanes`: number of input feature maps - `nclasses`: number of output classes + - `activation`: activation function to use - `pool_layer`: pooling layer to use. This is passed in with the layer instantiated with any arguments that are needed i.e. as `AdaptiveMeanPool((1, 1))`, for example. - `dropout_rate`: dropout rate used in the classifier head. - `use_conv`: whether to use a 1x1 convolutional layer instead of a `Dense` layer. """ -function create_classifier(inplanes::Integer, nclasses::Integer; - pool_layer = AdaptiveMeanPool((1, 1)), - dropout_rate = 0.0, use_conv::Bool = false) +function create_classifier(inplanes::Integer, nclasses::Integer, activation = identity; + use_conv::Bool = falsepool_layer = AdaptiveMeanPool((1, 1)), + dropout_rate = nothing) # Pooling - if pool_layer === identity - @assert use_conv - "Pooling can only be disabled if classifier is also removed or a convolution-based classifier is used" - end flatten_in_pool = !use_conv && pool_layer !== identity if use_conv @assert pool_layer === identity "`pool_layer` must be identity if `use_conv` is true" end - global_pool = flatten_in_pool ? Chain(pool_layer, MLUtils.flatten) : pool_layer + global_pool = flatten_in_pool ? [pool_layer, MLUtils.flatten] : [pool_layer] # Fully-connected layer - fc = use_conv ? Conv((1, 1), inplanes => nclasses) : Dense(inplanes => nclasses) - return Chain(global_pool, Dropout(dropout_rate), fc) + fc = use_conv ? Conv((1, 1), inplanes => nclasses, activation) : + Dense(inplanes => nclasses, activation) + drop = isnothing(dropout_rate) ? [] : [Dropout(dropout_rate)] + return Chain(global_pool..., drop..., fc) end diff --git a/src/mixers/core.jl b/src/mixers/core.jl index 18f66aaa8..f08a5f5d5 100644 --- a/src/mixers/core.jl +++ b/src/mixers/core.jl @@ -29,10 +29,9 @@ function mlpmixer(block, imsize::Dims{2} = (224, 224); norm_layer = LayerNorm, layers = Chain(PatchEmbedding(imsize; inchannels, patch_size, embedplanes), Chain([block(embedplanes, npatches; drop_path_rate = dp_rates[i], kwargs...) - for i in 1:depth])) - classification_head = Chain(norm_layer(embedplanes), seconddimmean, - Dense(embedplanes, nclasses)) - return Chain(layers, classification_head) + for i in 1:depth]...)) + classifier = Chain(norm_layer(embedplanes), seconddimmean, Dense(embedplanes, nclasses)) + return Chain(layers, classifier) end # Configurations for MLPMixer models diff --git a/src/mixers/mlpmixer.jl b/src/mixers/mlpmixer.jl index 06aefbd48..336a29a33 100644 --- a/src/mixers/mlpmixer.jl +++ b/src/mixers/mlpmixer.jl @@ -35,7 +35,7 @@ end """ MLPMixer(size::Symbol; patch_size::Dims{2} = (16, 16), imsize::Dims{2} = (224, 224), - inchannels::Integer = 3, nclasses::Integer = 1000) +inchannels::Integer = 3, nclasses::Integer = 1000) Creates a model with the MLPMixer architecture. ([reference](https://arxiv.org/pdf/2105.01601)). @@ -56,12 +56,14 @@ struct MLPMixer end @functor MLPMixer -function MLPMixer(size::Symbol; imsize::Dims{2} = (224, 224), patch_size::Dims{2} = (16, 16), +function MLPMixer(size::Symbol; imsize::Dims{2} = (224, 224), + patch_size::Dims{2} = (16, 16), inchannels::Integer = 3, nclasses::Integer = 1000) _checkconfig(size, keys(MIXER_CONFIGS)) depth = MIXER_CONFIGS[size][:depth] embedplanes = MIXER_CONFIGS[size][:planes] - layers = mlpmixer(mixerblock, imsize; patch_size, embedplanes, depth, inchannels,nclasses) + layers = mlpmixer(mixerblock, imsize; patch_size, embedplanes, depth, inchannels, + nclasses) return MLPMixer(layers) end diff --git a/src/vit-based/vit.jl b/src/vit-based/vit.jl index 1c049e46e..03e520076 100644 --- a/src/vit-based/vit.jl +++ b/src/vit-based/vit.jl @@ -99,7 +99,8 @@ struct ViT end @functor ViT -function ViT(mode::Symbol = :base; imsize::Dims{2} = (256, 256), patch_size::Dims{2} = (16, 16), +function ViT(mode::Symbol = :base; imsize::Dims{2} = (256, 256), + patch_size::Dims{2} = (16, 16), inchannels::Integer = 3, nclasses::Integer = 1000) _checkconfig(mode, keys(VIT_CONFIGS)) kwargs = VIT_CONFIGS[mode] From d90a6ae7b94dcc255fd64c7af1aa0b99bd0c7827 Mon Sep 17 00:00:00 2001 From: Abhirath Anand <74202102+theabhirath@users.noreply.github.com> Date: Tue, 2 Aug 2022 10:43:13 +0530 Subject: [PATCH 6/8] Unify higher level DenseNet API --- .github/workflows/CI.yml | 3 ++- src/convnets/densenet.jl | 48 ++++++++++++---------------------------- test/vits.jl | 2 +- 3 files changed, 17 insertions(+), 36 deletions(-) diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml index c13f1c2d6..a1bf822b9 100644 --- a/.github/workflows/CI.yml +++ b/.github/workflows/CI.yml @@ -34,7 +34,8 @@ jobs: - '"Inception"' - '"DenseNet"' - '["ConvNeXt", "ConvMixer"]' - - '[r"ViTs", r"Mixers"]' + - 'r"Mixers"' + - 'r"ViTs"' steps: - uses: actions/checkout@v2 - uses: julia-actions/setup-julia@v1 diff --git a/src/convnets/densenet.jl b/src/convnets/densenet.jl index 9720a0212..b82f138fb 100644 --- a/src/convnets/densenet.jl +++ b/src/convnets/densenet.jl @@ -105,44 +105,13 @@ function densenet(nblocks::Vector{<:Integer}; growth_rate::Integer = 32, reducti reduction, inchannels, nclasses) end -""" - DenseNet(nblocks::Vector{<:Integer}; growth_rate::Integer = 32, reduction = 0.5, - inchannels = 3, nclasses::Integer = 1000) - -Create a DenseNet model -([reference](https://arxiv.org/abs/1608.06993)). -See also [`densenet`](#). - -# Arguments - - - `nblocks`: number of dense blocks between transitions - - `growth_rate`: the output feature map growth rate of dense blocks (i.e. `k` in the paper) - - `reduction`: the factor by which the number of feature maps is scaled across each transition - - `nclasses`: the number of output classes -""" -struct DenseNet - layers::Any -end -@functor DenseNet - -function DenseNet(nblocks::Vector{<:Integer}; growth_rate::Integer = 32, reduction = 0.5, - inchannels = 3, nclasses::Integer = 1000) - layers = densenet(nblocks; growth_rate, reduction, inchannels, nclasses) - return DenseNet(layers) -end - -(m::DenseNet)(x) = m.layers(x) - -backbone(m::DenseNet) = m.layers[1] -classifier(m::DenseNet) = m.layers[2] - const DENSENET_CONFIGS = Dict(121 => [6, 12, 24, 16], 161 => [6, 12, 36, 24], 169 => [6, 12, 32, 32], 201 => [6, 12, 48, 32]) """ - DenseNet(config::Integer = 121; pretrain::Bool = false, nclasses::Integer = 1000) + DenseNet(config::Integer; pretrain::Bool = false, nclasses::Integer = 1000) DenseNet(transition_configs::NTuple{N,Integer}) Create a DenseNet model with specified configuration. Currently supported values are (121, 161, 169, 201) @@ -155,11 +124,22 @@ Set `pretrain = true` to load the model with pre-trained weights for ImageNet. See also [`Metalhead.densenet`](#). """ -function DenseNet(config::Integer = 121; pretrain::Bool = false, nclasses::Integer = 1000) +struct DenseNet + layers::Any +end +@functor DenseNet + +function DenseNet(config::Integer; pretrain::Bool = false, growth_rate::Integer = 32, + reduction = 0.5, inchannels::Integer = 3, nclasses::Integer = 1000) _checkconfig(config, keys(DENSENET_CONFIGS)) - model = DenseNet(DENSENET_CONFIGS[config]; nclasses = nclasses) + model = densenet(DENSENET_CONFIGS[config]; growth_rate, reduction, inchannels, nclasses) if pretrain loadpretrain!(model, string("DenseNet", config)) end return model end + +(m::DenseNet)(x) = m.layers(x) + +backbone(m::DenseNet) = m.layers[1] +classifier(m::DenseNet) = m.layers[2] diff --git a/test/vits.jl b/test/vits.jl index 13733ddec..fb9fd6b02 100644 --- a/test/vits.jl +++ b/test/vits.jl @@ -1,5 +1,5 @@ @testset "ViT" begin - for mode in [:tiny, :small, :base, :large, :huge] #:giant, #:gigantic + for mode in [:tiny, :small, :base, :large, :huge] # :giant, :gigantic] m = ViT(mode) @test size(m(x_256)) == (1000, 1) @test gradtest(m, x_256) From 8ce0dce82e301112f82af4759c46a7975dd5fd57 Mon Sep 17 00:00:00 2001 From: Abhirath Anand <74202102+theabhirath@users.noreply.github.com> Date: Tue, 2 Aug 2022 19:41:31 +0530 Subject: [PATCH 7/8] Add a bunch of Compat entries --- Project.toml | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/Project.toml b/Project.toml index 6230fdbee..691003944 100644 --- a/Project.toml +++ b/Project.toml @@ -20,9 +20,13 @@ Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2" [compat] BSON = "0.3.2" Flux = "0.13" -Functors = "0.2" -MLUtils = "0.2.6" -NNlib = "0.7.34, 0.8" +Functors = "0.2, 0.3" +CUDA = "3" +ChainRulesCore = "1" +PartialFunctions = "1" +MLUtils = "0.2.10" +NNlib = "0.8" +NNlibCUDA = "0.2" julia = "1.6" [publish] From 59e1ef40a6e30c78ee044f5f505bff603318d697 Mon Sep 17 00:00:00 2001 From: Abhirath Anand <74202102+theabhirath@users.noreply.github.com> Date: Wed, 3 Aug 2022 09:13:07 +0530 Subject: [PATCH 8/8] More uniformity + cleanup --- src/Metalhead.jl | 8 ++- src/convnets/alexnet.jl | 19 ++++--- src/convnets/convmixer.jl | 35 ++++++------- src/convnets/convnext.jl | 30 +++++------ src/convnets/densenet.jl | 3 +- src/convnets/efficientnet.jl | 57 ++++++--------------- src/convnets/inception/googlenet.jl | 3 +- src/convnets/inception/inceptionresnetv2.jl | 3 +- src/convnets/inception/inceptionv3.jl | 3 +- src/convnets/inception/inceptionv4.jl | 3 +- src/convnets/inception/xception.jl | 4 +- src/convnets/mobilenet/mobilenetv1.jl | 8 +-- src/convnets/mobilenet/mobilenetv2.jl | 7 ++- src/convnets/mobilenet/mobilenetv3.jl | 18 +++---- src/convnets/resnets/core.jl | 31 ++++++----- src/convnets/vgg.jl | 9 ++-- src/layers/attention.jl | 3 +- src/layers/conv.jl | 36 ++++++++----- src/layers/drop.jl | 34 ++++++------ src/layers/embeddings.jl | 2 - src/layers/pool.jl | 5 +- src/mixers/core.jl | 13 ++--- src/mixers/gmlp.jl | 14 +++-- src/mixers/mlpmixer.jl | 15 +++--- src/mixers/resmlp.jl | 20 ++++---- src/utilities.jl | 2 +- src/vit-based/vit.jl | 10 ++-- test/convnets.jl | 30 +++++------ test/mixers.jl | 4 +- test/vits.jl | 4 +- 30 files changed, 202 insertions(+), 231 deletions(-) diff --git a/src/Metalhead.jl b/src/Metalhead.jl index 374f28615..78073c154 100644 --- a/src/Metalhead.jl +++ b/src/Metalhead.jl @@ -56,14 +56,12 @@ include("vit-based/vit.jl") include("pretrain.jl") export AlexNet, VGG, VGG11, VGG13, VGG16, VGG19, - ResNet, ResNet18, ResNet34, ResNet50, ResNet101, ResNet152, ResNeXt, + ResNet, ResNet18, ResNet34, ResNet50, ResNet101, ResNet152, + WideResNet, ResNeXt, SEResNet, SEResNeXt, DenseNet, DenseNet121, DenseNet161, DenseNet169, DenseNet201, GoogLeNet, Inception3, Inceptionv3, Inceptionv4, InceptionResNetv2, Xception, SqueezeNet, MobileNetv1, MobileNetv2, MobileNetv3, EfficientNet, - WideResNet, SEResNet, SEResNeXt, - MLPMixer, ResMLP, gMLP, - ViT, - ConvMixer, ConvNeXt + MLPMixer, ResMLP, gMLP, ViT, ConvMixer, ConvNeXt # use Flux._big_show to pretty print large models for T in (:AlexNet, :VGG, :ResNet, :ResNeXt, :DenseNet, :SEResNet, :SEResNeXt, diff --git a/src/convnets/alexnet.jl b/src/convnets/alexnet.jl index 6b384f80c..3c713839e 100644 --- a/src/convnets/alexnet.jl +++ b/src/convnets/alexnet.jl @@ -1,11 +1,12 @@ """ - alexnet(; nclasses::Integer = 1000) + alexnet(; inchannels::Integer = 3, nclasses::Integer = 1000) Create an AlexNet model ([reference](https://papers.nips.cc/paper/2012/file/c399862d3b9d6b76c8436e924a68c45b-Paper.pdf)). # Arguments + - `inchannels`: The number of input channels. - `nclasses`: the number of output classes """ function alexnet(; inchannels::Integer = 3, nclasses::Integer = 1000) @@ -27,19 +28,23 @@ function alexnet(; inchannels::Integer = 3, nclasses::Integer = 1000) end """ - AlexNet(; pretrain::Bool = false, nclasses::Integer = 1000) + AlexNet(; pretrain::Bool = false, inchannels::Integer = 3, + nclasses::Integer = 1000) Create a `AlexNet`. -See also [`alexnet`](#). - -!!! warning - - `AlexNet` does not currently support pretrained weights. +([reference](https://papers.nips.cc/paper/2012/file/c399862d3b9d6b76c8436e924a68c45b-Paper.pdf)). # Arguments - `pretrain`: set to `true` to load pre-trained weights for ImageNet + - `inchannels`: The number of input channels. - `nclasses`: the number of output classes + +!!! warning + + `AlexNet` does not currently support pretrained weights. + +See also [`alexnet`](#). """ struct AlexNet layers::Any diff --git a/src/convnets/convmixer.jl b/src/convnets/convmixer.jl index efde886cb..309989d2d 100644 --- a/src/convnets/convmixer.jl +++ b/src/convnets/convmixer.jl @@ -26,28 +26,28 @@ function convmixer(planes::Integer, depth::Integer; kernel_size = (9, 9), pad = SamePad())), +), conv_norm((1, 1), planes, planes, activation; preact = true)...) for _ in 1:depth] - return Chain(Chain(stem..., Chain(blocks)), create_classifier(planes, nclasses)) + return Chain(Chain(stem..., Chain(blocks...)), create_classifier(planes, nclasses)) end -const CONVMIXER_CONFIGS = Dict(:base => Dict(:planes => 1536, :depth => 20, - :kernel_size => (9, 9), - :patch_size => (7, 7)), - :small => Dict(:planes => 768, :depth => 32, - :kernel_size => (7, 7), - :patch_size => (7, 7)), - :large => Dict(:planes => 1024, :depth => 20, - :kernel_size => (9, 9), - :patch_size => (7, 7))) +const CONVMIXER_CONFIGS = Dict(:base => ((1536, 20), + (kernel_size = (9, 9), + patch_size = (7, 7))), + :small => ((768, 32), + (kernel_size = (7, 7), + patch_size = (7, 7))), + :large => ((1024, 20), + (kernel_size = (9, 9), + patch_size = (7, 7)))) """ - ConvMixer(mode::Symbol; inchannels::Integer = 3, nclasses::Integer = 1000) + ConvMixer(config::Symbol; inchannels::Integer = 3, nclasses::Integer = 1000) Creates a ConvMixer model. ([reference](https://arxiv.org/abs/2201.09792)) # Arguments - - `mode`: the mode of the model, either `:base`, `:small` or `:large` + - `config`: the size of the model, either `:base`, `:small` or `:large` - `inchannels`: The number of channels in the input. - `nclasses`: number of classes in the output """ @@ -56,13 +56,10 @@ struct ConvMixer end @functor ConvMixer -function ConvMixer(mode::Symbol; inchannels::Integer = 3, nclasses::Integer = 1000) - _checkconfig(mode, keys(CONVMIXER_CONFIGS)) - planes = CONVMIXER_CONFIGS[mode][:planes] - depth = CONVMIXER_CONFIGS[mode][:depth] - kernel_size = CONVMIXER_CONFIGS[mode][:kernel_size] - patch_size = CONVMIXER_CONFIGS[mode][:patch_size] - layers = convmixer(planes, depth; inchannels, kernel_size, patch_size, nclasses) +function ConvMixer(config::Symbol; inchannels::Integer = 3, nclasses::Integer = 1000) + _checkconfig(config, keys(CONVMIXER_CONFIGS)) + layers = convmixer(CONVMIXER_CONFIGS[config][1]...; CONVMIXER_CONFIGS[config][2]..., + inchannels, nclasses) return ConvMixer(layers) end diff --git a/src/convnets/convnext.jl b/src/convnets/convnext.jl index 7bb265c24..040a409ab 100644 --- a/src/convnets/convnext.jl +++ b/src/convnets/convnext.jl @@ -22,7 +22,7 @@ function convnextblock(planes::Integer, drop_path_rate = 0.0, layerscale_init = end """ - convnext(depths::Vector{<:Integer}, planes::Vector{<:Integer}; + convnext(depths::AbstractVector{<:Integer}, planes::AbstractVector{<:Integer}; drop_path_rate = 0.0, layerscale_init = 1.0f-6, inchannels::Integer = 3, nclasses::Integer = 1000) @@ -31,27 +31,27 @@ Creates the layers for a ConvNeXt model. # Arguments - - `inchannels`: number of input channels. - `depths`: list with configuration for depth of each block - `planes`: list with configuration for number of output channels in each block - `drop_path_rate`: Stochastic depth rate. - `layerscale_init`: Initial value for [`LayerScale`](#) ([reference](https://arxiv.org/abs/2103.17239)) + - `inchannels`: number of input channels. - `nclasses`: number of output classes """ -function convnext(depths::Vector{<:Integer}, planes::Vector{<:Integer}; +function convnext(depths::AbstractVector{<:Integer}, planes::AbstractVector{<:Integer}; drop_path_rate = 0.0, layerscale_init = 1.0f-6, inchannels::Integer = 3, nclasses::Integer = 1000) @assert length(depths) == length(planes) "`planes` should have exactly one value for each block" downsample_layers = [] - stem = Chain(Conv((4, 4), inchannels => planes[1]; stride = 4), - ChannelLayerNorm(planes[1])) - push!(downsample_layers, stem) + push!(downsample_layers, + Chain(conv_norm((4, 4), inchannels => planes[1]; stride = 4, + norm_layer = ChannelLayerNorm)...)) for m in 1:(length(depths) - 1) - downsample_layer = Chain(ChannelLayerNorm(planes[m]), - Conv((2, 2), planes[m] => planes[m + 1]; stride = 2)) - push!(downsample_layers, downsample_layer) + push!(downsample_layers, + Chain(conv_norm((2, 2), planes[m] => planes[m + 1]; stride = 2, + norm_layer = ChannelLayerNorm, revnorm = true)...)) end stages = [] dp_rates = linear_scheduler(drop_path_rate; depth = sum(depths)) @@ -64,8 +64,7 @@ function convnext(depths::Vector{<:Integer}, planes::Vector{<:Integer}; end backbone = collect(Iterators.flatten(Iterators.flatten(zip(downsample_layers, stages)))) classifier = Chain(GlobalMeanPool(), MLUtils.flatten, - LayerNorm(planes[end]), - Dense(planes[end], nclasses)) + LayerNorm(planes[end]), Dense(planes[end], nclasses)) return Chain(Chain(backbone...), classifier) end @@ -77,13 +76,14 @@ const CONVNEXT_CONFIGS = Dict(:tiny => ([3, 3, 9, 3], [96, 192, 384, 768]), :xlarge => ([3, 3, 27, 3], [256, 512, 1024, 2048])) """ - ConvNeXt(mode::Symbol; inchannels::Integer = 3, nclasses::Integer = 1000) + ConvNeXt(config::Symbol; inchannels::Integer = 3, nclasses::Integer = 1000) Creates a ConvNeXt model. ([reference](https://arxiv.org/abs/2201.03545)) # Arguments + - `config`: The size of the model, one of `tiny`, `small`, `base`, `large` or `xlarge`. - `inchannels`: The number of channels in the input. - `nclasses`: number of output classes @@ -94,9 +94,9 @@ struct ConvNeXt end @functor ConvNeXt -function ConvNeXt(mode::Symbol; inchannels::Integer = 3, nclasses::Integer = 1000) - _checkconfig(mode, keys(CONVNEXT_CONFIGS)) - layers = convnext(CONVNEXT_CONFIGS[mode]...; inchannels, nclasses) +function ConvNeXt(config::Symbol; inchannels::Integer = 3, nclasses::Integer = 1000) + _checkconfig(config, keys(CONVNEXT_CONFIGS)) + layers = convnext(CONVNEXT_CONFIGS[config]...; inchannels, nclasses) return ConvNeXt(layers) end diff --git a/src/convnets/densenet.jl b/src/convnets/densenet.jl index b82f138fb..ab833bd41 100644 --- a/src/convnets/densenet.jl +++ b/src/convnets/densenet.jl @@ -99,7 +99,8 @@ Create a DenseNet model - `reduction`: the factor by which the number of feature maps is scaled across each transition - `nclasses`: the number of output classes """ -function densenet(nblocks::Vector{<:Integer}; growth_rate::Integer = 32, reduction = 0.5, +function densenet(nblocks::AbstractVector{<:Integer}; growth_rate::Integer = 32, + reduction = 0.5, inchannels::Integer = 3, nclasses::Integer = 1000) return densenet(2 * growth_rate, [fill(growth_rate, n) for n in nblocks]; reduction, inchannels, nclasses) diff --git a/src/convnets/efficientnet.jl b/src/convnets/efficientnet.jl index 86ba9373f..91986fb92 100644 --- a/src/convnets/efficientnet.jl +++ b/src/convnets/efficientnet.jl @@ -22,8 +22,10 @@ Create an EfficientNet model ([reference](https://arxiv.org/abs/1905.11946v5)). - `max_width`: maximum number of output channels before the fully connected classification blocks """ -function efficientnet(scalings, block_configs; max_width::Integer = 1280, - inchannels::Integer = 3, nclasses::Integer = 1000) +function efficientnet(scalings::NTuple{2, Real}, + block_configs::AbstractVector{NTuple{6, Int}}; + max_width::Integer = 1280, inchannels::Integer = 3, + nclasses::Integer = 1000) wscale, dscale = scalings scalew(w) = wscale ≈ 1 ? w : ceil(Int64, wscale * w) scaled(d) = dscale ≈ 1 ? d : ceil(Int64, dscale * d) @@ -83,61 +85,32 @@ const EFFICIENTNET_GLOBAL_CONFIGS = Dict(:b0 => (224, (1.0, 1.0)), :b8 => (672, (2.2, 3.6))) """ - EfficientNet(scalings, block_configs; max_width::Integer = 1280, - inchannels::Integer = 3, nclasses::Integer = 1000) + EfficientNet(config::Symbol; pretrain::Bool = false) Create an EfficientNet model ([reference](https://arxiv.org/abs/1905.11946v5)). See also [`efficientnet`](#). # Arguments - - `scalings`: global width and depth scaling (given as a tuple) - - - `block_configs`: configuration for each inverted residual block, - given as a vector of tuples with elements: - - + `n`: number of block repetitions (will be scaled by global depth scaling) - + `k`: kernel size - + `s`: kernel stride - + `e`: expansion ratio - + `i`: block input channels (will be scaled by global width scaling) - + `o`: block output channels (will be scaled by global width scaling) - - `inchannels`: number of input channels - - `nclasses`: number of output classes - - `max_width`: maximum number of output channels before the fully connected - classification blocks + - `config`: name of default configuration + (can be `:b0`, `:b1`, `:b2`, `:b3`, `:b4`, `:b5`, `:b6`, `:b7`, `:b8`) + - `pretrain`: set to `true` to load the pre-trained weights for ImageNet """ struct EfficientNet layers::Any end @functor EfficientNet -function EfficientNet(scalings, block_configs; max_width::Integer = 1280, - inchannels::Integer = 3, nclasses::Integer = 1000) - layers = efficientnet(scalings, block_configs; inchannels, nclasses, max_width) - return EfficientNet(layers) +function EfficientNet(config::Symbol; pretrain::Bool = false) + _checkconfig(config, keys(EFFICIENTNET_GLOBAL_CONFIGS)) + model = efficientnet(EFFICIENTNET_GLOBAL_CONFIGS[config][2], EFFICIENTNET_BLOCK_CONFIGS) + if pretrain + loadpretrain!(model, string("efficientnet-", config)) + end + return model end (m::EfficientNet)(x) = m.layers(x) backbone(m::EfficientNet) = m.layers[1] classifier(m::EfficientNet) = m.layers[2] - -""" - EfficientNet(name::Symbol; pretrain::Bool = false) - -Create an EfficientNet model ([reference](https://arxiv.org/abs/1905.11946v5)). -See also [`efficientnet`](#). - -# Arguments - - - `name`: name of default configuration - (can be `:b0`, `:b1`, `:b2`, `:b3`, `:b4`, `:b5`, `:b6`, `:b7`, `:b8`) - - `pretrain`: set to `true` to load the pre-trained weights for ImageNet -""" -function EfficientNet(name::Symbol; pretrain::Bool = false) - _checkconfig(name, keys(EFFICIENTNET_GLOBAL_CONFIGS)) - model = EfficientNet(EFFICIENTNET_GLOBAL_CONFIGS[name][2], EFFICIENTNET_BLOCK_CONFIGS) - pretrain && loadpretrain!(model, string("efficientnet-", name)) - return model -end diff --git a/src/convnets/inception/googlenet.jl b/src/convnets/inception/googlenet.jl index a72ba5e6c..11d4dd7d3 100644 --- a/src/convnets/inception/googlenet.jl +++ b/src/convnets/inception/googlenet.jl @@ -53,8 +53,7 @@ function googlenet(; inchannels::Integer = 3, nclasses::Integer = 1000) MaxPool((3, 3); stride = 2, pad = 1), _inceptionblock(832, 256, 160, 320, 32, 128, 128), _inceptionblock(832, 384, 192, 384, 48, 128, 128)) - classifier = create_classifier(1024, nclasses; dropout_rate = 0.4) - return Chain(backbone, classifier) + return Chain(backbone, create_classifier(1024, nclasses; dropout_rate = 0.4)) end """ diff --git a/src/convnets/inception/inceptionresnetv2.jl b/src/convnets/inception/inceptionresnetv2.jl index 96b391b65..c2855191b 100644 --- a/src/convnets/inception/inceptionresnetv2.jl +++ b/src/convnets/inception/inceptionresnetv2.jl @@ -92,8 +92,7 @@ function inceptionresnetv2(; inchannels::Integer = 3, dropout_rate = 0.0, [block8(0.20f0) for _ in 1:9]..., block8(; activation = relu), conv_norm((1, 1), 2080, 1536)...) - classifier = create_classifier(1536, nclasses; dropout_rate) - return Chain(backbone, classifier) + return Chain(backbone, create_classifier(1536, nclasses; dropout_rate)) end """ diff --git a/src/convnets/inception/inceptionv3.jl b/src/convnets/inception/inceptionv3.jl index 8a5e19849..e5083feb5 100644 --- a/src/convnets/inception/inceptionv3.jl +++ b/src/convnets/inception/inceptionv3.jl @@ -154,8 +154,7 @@ function inceptionv3(; inchannels::Integer = 3, nclasses::Integer = 1000) inceptionv3_d(768), inceptionv3_e(1280), inceptionv3_e(2048)) - classifier = create_classifier(2048, nclasses; dropout_rate = 0.2) - return Chain(backbone, classifier) + return Chain(backbone, create_classifier(2048, nclasses; dropout_rate = 0.2)) end """ diff --git a/src/convnets/inception/inceptionv4.jl b/src/convnets/inception/inceptionv4.jl index 8d4f00eb2..cd4971742 100644 --- a/src/convnets/inception/inceptionv4.jl +++ b/src/convnets/inception/inceptionv4.jl @@ -117,8 +117,7 @@ function inceptionv4(; dropout_rate = 0.0, inchannels::Integer = 3, inceptionv4_c(), inceptionv4_c(), inceptionv4_c()) - classifier = create_classifier(1536, nclasses; dropout_rate) - return Chain(backbone, classifier) + return Chain(backbone, create_classifier(1536, nclasses; dropout_rate)) end """ diff --git a/src/convnets/inception/xception.jl b/src/convnets/inception/xception.jl index 71a4efc15..1c97daddc 100644 --- a/src/convnets/inception/xception.jl +++ b/src/convnets/inception/xception.jl @@ -45,15 +45,15 @@ function xception_block(inchannels::Integer, outchannels::Integer, nrepeats::Int end """ - xception(; inchannels::Integer = 3, dropout_rate = 0.0, nclasses::Integer = 1000) + xception(; dropout_rate = 0.0, inchannels::Integer = 3, nclasses::Integer = 1000) Creates an Xception model. ([reference](https://arxiv.org/abs/1610.02357)) # Arguments - - `inchannels`: number of input channels. - `dropout_rate`: rate of dropout in classifier head. + - `inchannels`: number of input channels. - `nclasses`: the number of output classes. """ function xception(; dropout_rate = 0.0, inchannels::Integer = 3, nclasses::Integer = 1000) diff --git a/src/convnets/mobilenet/mobilenetv1.jl b/src/convnets/mobilenet/mobilenetv1.jl index ca20b4a64..b6d9fe8ee 100644 --- a/src/convnets/mobilenet/mobilenetv1.jl +++ b/src/convnets/mobilenet/mobilenetv1.jl @@ -1,5 +1,5 @@ """ - mobilenetv1(width_mult::Number, config::Vector{<:Tuple}; activation = relu, + mobilenetv1(width_mult::Real, config::AbstractVector{<:Tuple}; activation = relu, inchannels::Integer = 3, nclasses::Integer = 1000) Create a MobileNetv1 model ([reference](https://arxiv.org/abs/1704.04861v1)). @@ -19,11 +19,11 @@ Create a MobileNetv1 model ([reference](https://arxiv.org/abs/1704.04861v1)). - `inchannels`: The number of input channels. The default value is 3. - `nclasses`: The number of output classes """ -function mobilenetv1(width_mult::Number, config::Vector{<:Tuple}; activation = relu, +function mobilenetv1(width_mult::Real, config::AbstractVector{<:Tuple}; activation = relu, inchannels::Integer = 3, nclasses::Integer = 1000) layers = [] for (dw, outch, stride, nrepeats) in config - outch = Int(outch * width_mult) + outch = floor(Int, outch * width_mult) for _ in 1:nrepeats layer = dw ? depthwise_sep_conv_norm((3, 3), inchannels, outch, activation; @@ -76,7 +76,7 @@ struct MobileNetv1 end @functor MobileNetv1 -function MobileNetv1(width_mult::Number = 1; pretrain::Bool = false, +function MobileNetv1(width_mult::Real = 1; pretrain::Bool = false, inchannels::Integer = 3, nclasses::Integer = 1000) layers = mobilenetv1(width_mult, MOBILENETV1_CONFIGS; inchannels, nclasses) if pretrain diff --git a/src/convnets/mobilenet/mobilenetv2.jl b/src/convnets/mobilenet/mobilenetv2.jl index 59e147829..84162e985 100644 --- a/src/convnets/mobilenet/mobilenetv2.jl +++ b/src/convnets/mobilenet/mobilenetv2.jl @@ -1,5 +1,5 @@ """ - mobilenetv2(width_mult::Number, configs::Vector{<:Tuple}; + mobilenetv2(width_mult::Real, configs::AbstractVector{<:Tuple}; max_width::Integer = 1280, inchannels::Integer = 3, nclasses::Integer = 1000) @@ -22,7 +22,7 @@ Create a MobileNetv2 model. - `max_width`: The maximum number of feature maps in any layer of the network - `nclasses`: The number of output classes """ -function mobilenetv2(width_mult::Number, configs::Vector{<:Tuple}; +function mobilenetv2(width_mult::Real, configs::AbstractVector{<:Tuple}; max_width::Integer = 1280, inchannels::Integer = 3, nclasses::Integer = 1000) divisor = width_mult == 0.1 ? 4 : 8 @@ -83,10 +83,9 @@ struct MobileNetv2 end @functor MobileNetv2 -function MobileNetv2(width_mult::Number = 1; pretrain::Bool = false, +function MobileNetv2(width_mult::Real = 1; pretrain::Bool = false, inchannels::Integer = 3, nclasses::Integer = 1000) layers = mobilenetv2(width_mult, MOBILENETV2_CONFIGS; inchannels, nclasses) - pretrain && loadpretrain!(layers, string("MobileNetv2")) if pretrain loadpretrain!(layers, string("MobileNetv2")) end diff --git a/src/convnets/mobilenet/mobilenetv3.jl b/src/convnets/mobilenet/mobilenetv3.jl index 1c5e5825b..7d06ab14d 100644 --- a/src/convnets/mobilenet/mobilenetv3.jl +++ b/src/convnets/mobilenet/mobilenetv3.jl @@ -1,5 +1,5 @@ """ - mobilenetv3(width_mult::Number, configs::Vector{<:Tuple}; + mobilenetv3(width_mult::Real, configs::AbstractVector{<:Tuple}; max_width::Integer = 1024, inchannels::Integer = 3, nclasses::Integer = 1000) @@ -24,7 +24,7 @@ Create a MobileNetv3 model. - `max_width`: The maximum number of feature maps in any layer of the network - `nclasses`: the number of output classes """ -function mobilenetv3(width_mult::Number, configs::Vector{<:Tuple}; +function mobilenetv3(width_mult::Real, configs::AbstractVector{<:Tuple}; max_width::Integer = 1024, inchannels::Integer = 3, nclasses::Integer = 1000) # building first layer @@ -91,7 +91,7 @@ const MOBILENETV3_CONFIGS = Dict(:small => [ ]) """ - MobileNetv3(mode::Symbol; width_mult::Number = 1, pretrain::Bool = false, + MobileNetv3(config::Symbol; width_mult::Real = 1, pretrain::Bool = false, inchannels::Integer = 3, nclasses::Integer = 1000) Create a MobileNetv3 model with the specified configuration. @@ -100,7 +100,7 @@ Set `pretrain = true` to load the model with pre-trained weights for ImageNet. # Arguments - - `mode`: :small or :large for the size of the model (see paper). + - `config`: :small or :large for the size of the model (see paper). - `width_mult`: Controls the number of output feature maps in each block (with 1.0 being the default in the paper; this is usually a value between 0.1 and 1.4) @@ -115,14 +115,14 @@ struct MobileNetv3 end @functor MobileNetv3 -function MobileNetv3(mode::Symbol; width_mult::Number = 1, pretrain::Bool = false, +function MobileNetv3(config::Symbol; width_mult::Real = 1, pretrain::Bool = false, inchannels::Integer = 3, nclasses::Integer = 1000) - _checkconfig(mode, [:small, :large]) - max_width = (mode == :large) ? 1280 : 1024 - layers = mobilenetv3(width_mult, MOBILENETV3_CONFIGS[mode]; inchannels, max_width, + _checkconfig(config, [:small, :large]) + max_width = (config == :large) ? 1280 : 1024 + layers = mobilenetv3(width_mult, MOBILENETV3_CONFIGS[config]; max_width, inchannels, nclasses) if pretrain - loadpretrain!(layers, string("MobileNetv3", mode)) + loadpretrain!(layers, string("MobileNetv3", config)) end return MobileNetv3(layers) end diff --git a/src/convnets/resnets/core.jl b/src/convnets/resnets/core.jl index 940565f3a..79deadfb2 100644 --- a/src/convnets/resnets/core.jl +++ b/src/convnets/resnets/core.jl @@ -65,7 +65,7 @@ function bottleneck(inplanes::Integer, planes::Integer; stride::Integer, norm_layer = BatchNorm, revnorm::Bool = false, drop_block = identity, drop_path = identity, attn_fn = planes -> identity) - width = floor(Int, planes * (base_width / 64)) * cardinality + width = fld(planes * base_width, 64) * cardinality first_planes = width ÷ reduction_factor outplanes = planes * 4 conv_bn1 = conv_norm((1, 1), inplanes => first_planes, activation; norm_layer, revnorm, @@ -190,15 +190,16 @@ function resnet_stem(stem_type::Symbol = :default; inchannels::Integer = 3, return Chain(conv1, bn1, stempool) end -function resnet_planes(block_repeats::Vector{<:Integer}) +function resnet_planes(block_repeats::AbstractVector{<:Integer}) return Iterators.flatten((64 * 2^(stage_idx - 1) for _ in 1:stages) for (stage_idx, stages) in enumerate(block_repeats)) end -function basicblock_builder(block_repeats::Vector{<:Integer}; inplanes::Integer = 64, - reduction_factor::Integer = 1, expansion::Integer = 1, - norm_layer = BatchNorm, revnorm::Bool = false, - activation = relu, attn_fn = planes -> identity, +function basicblock_builder(block_repeats::AbstractVector{<:Integer}; + inplanes::Integer = 64, reduction_factor::Integer = 1, + expansion::Integer = 1, norm_layer = BatchNorm, + revnorm::Bool = false, activation = relu, + attn_fn = planes -> identity, drop_block_rate = 0.0, drop_path_rate = 0.0, stride_fn = resnet_stride, planes_fn = resnet_planes, downsample_tuple = (downsample_conv, downsample_identity)) @@ -228,11 +229,12 @@ function basicblock_builder(block_repeats::Vector{<:Integer}; inplanes::Integer return get_layers end -function bottleneck_builder(block_repeats::Vector{<:Integer}; inplanes::Integer = 64, - cardinality::Integer = 1, base_width::Integer = 64, - reduction_factor::Integer = 1, expansion::Integer = 4, - norm_layer = BatchNorm, revnorm::Bool = false, - activation = relu, attn_fn = planes -> identity, +function bottleneck_builder(block_repeats::AbstractVector{<:Integer}; + inplanes::Integer = 64, cardinality::Integer = 1, + base_width::Integer = 64, reduction_factor::Integer = 1, + expansion::Integer = 4, norm_layer = BatchNorm, + revnorm::Bool = false, activation = relu, + attn_fn = planes -> identity, drop_block_rate = 0.0, drop_path_rate = 0.0, stride_fn = resnet_stride, planes_fn = resnet_planes, downsample_tuple = (downsample_conv, downsample_identity)) @@ -265,7 +267,7 @@ function bottleneck_builder(block_repeats::Vector{<:Integer}; inplanes::Integer return get_layers end -function resnet_stages(get_layers, block_repeats::Vector{<:Integer}, connection) +function resnet_stages(get_layers, block_repeats::AbstractVector{<:Integer}, connection) # Construct each stage stages = [] for (stage_idx, num_blocks) in enumerate(block_repeats) @@ -277,7 +279,8 @@ function resnet_stages(get_layers, block_repeats::Vector{<:Integer}, connection) return Chain(stages...) end -function resnet(img_dims, stem, get_layers, block_repeats::Vector{<:Integer}, connection, +function resnet(img_dims, stem, get_layers, block_repeats::AbstractVector{<:Integer}, + connection, classifier_fn) # Build stages of the ResNet stage_blocks = resnet_stages(get_layers, block_repeats, connection) @@ -288,7 +291,7 @@ function resnet(img_dims, stem, get_layers, block_repeats::Vector{<:Integer}, co return Chain(backbone, classifier) end -function resnet(block_type::Symbol, block_repeats::Vector{<:Integer}; +function resnet(block_type::Symbol, block_repeats::AbstractVector{<:Integer}; downsample_opt::NTuple{2, Any} = (downsample_conv, downsample_identity), cardinality::Integer = 1, base_width::Integer = 64, inplanes::Integer = 64, reduction_factor::Integer = 1, imsize::Dims{2} = (256, 256), diff --git a/src/convnets/vgg.jl b/src/convnets/vgg.jl index e685620a3..de232d9a3 100644 --- a/src/convnets/vgg.jl +++ b/src/convnets/vgg.jl @@ -40,7 +40,7 @@ Create VGG convolution layers - `batchnorm`: set to `true` to include batch normalization after each convolution - `inchannels`: number of input channels """ -function vgg_convolutional_layers(config::Vector{<:Tuple}, batchnorm::Bool, +function vgg_convolutional_layers(config::AbstractVector{<:Tuple}, batchnorm::Bool, inchannels::Integer) layers = [] ifilters = inchannels @@ -69,7 +69,7 @@ Create VGG classifier (fully connected) layers function vgg_classifier_layers(imsize::NTuple{3, <:Integer}, nclasses::Integer, fcsize::Integer, dropout_rate) return Chain(MLUtils.flatten, - Dense(Int(prod(imsize)), fcsize, relu), + Dense(prod(imsize), fcsize, relu), Dropout(dropout_rate), Dense(fcsize, fcsize, relu), Dropout(dropout_rate), @@ -107,10 +107,7 @@ const VGG_CONV_CONFIGS = Dict(:A => [(64, 1), (128, 1), (256, 2), (512, 2), (512 :D => [(64, 2), (128, 2), (256, 3), (512, 3), (512, 3)], :E => [(64, 2), (128, 2), (256, 4), (512, 4), (512, 4)]) -const VGG_CONFIGS = Dict(11 => :A, - 13 => :B, - 16 => :D, - 19 => :E) +const VGG_CONFIGS = Dict(11 => :A, 13 => :B, 16 => :D, 19 => :E) """ VGG(imsize::Dims{2}; config, inchannels, batchnorm = false, nclasses, fcsize, dropout_rate) diff --git a/src/layers/attention.jl b/src/layers/attention.jl index e2276aa01..b8fd38165 100644 --- a/src/layers/attention.jl +++ b/src/layers/attention.jl @@ -1,5 +1,6 @@ """ - MHAttention(planes::Integer, nheads::Integer = 8; qkv_bias::Bool = false, attn_dropout_rate = 0., proj_dropout_rate = 0.) + MHAttention(planes::Integer, nheads::Integer = 8; qkv_bias::Bool = false, + attn_dropout_rate = 0., proj_dropout_rate = 0.) Multi-head self-attention layer. diff --git a/src/layers/conv.jl b/src/layers/conv.jl index 75b40708c..c355eac2f 100644 --- a/src/layers/conv.jl +++ b/src/layers/conv.jl @@ -1,7 +1,11 @@ """ - conv_norm(kernel_size, inplanes::Int, outplanes::Int, activation = relu; - norm_layer = BatchNorm, revnorm = false, preact = false, use_norm = true, - stride = 1, pad = 0, dilation = 1, groups = 1, [bias, weight, init]) + conv_norm(kernel_size, inplanes::Integer, outplanes::Integer, activation = relu; + norm_layer = BatchNorm, revnorm::Bool = false, preact::Bool = false, + use_norm::Bool = true, stride::Integer = 1, pad::Integer = 0, + dilation::Integer = 1, groups::Integer = 1, [bias, weight, init]) + + conv_norm(kernel_size, inplanes => outplanes, activation = identity; + kwargs...) Create a convolution + batch normalization pair with activation. @@ -59,17 +63,21 @@ function conv_norm(kernel_size, ch::Pair{<:Integer, <:Integer}, activation = ide end """ - depthwise_sep_conv_norm(kernel_size, inplanes, outplanes, activation = relu; - revnorm = false, use_norm = (true, true), - stride = 1, pad = 0, dilation = 1, [bias, weight, init]) + depthwise_sep_conv_norm(kernel_size, inplanes::Integer, outplanes::Integer, + activation = relu; norm_layer = BatchNorm, + revnorm::Bool = false, stride::Integer = 1, + use_norm::NTuple{2, Bool} = (true, true), + pad::Integer = 0, dilation::Integer = 1, [bias, weight, init]) Create a depthwise separable convolution chain as used in MobileNetv1. This is sequence of layers: - a `kernel_size` depthwise convolution from `inplanes => inplanes` - - a batch norm layer + `activation` (if `use_norm[1] == true`; otherwise `activation` is applied to the convolution output) + - a (batch) normalisation layer + `activation` (if `use_norm[1] == true`; otherwise + `activation` is applied to the convolution output) - a `kernel_size` convolution from `inplanes => outplanes` - - a batch norm layer + `activation` (if `use_norm[2] == true`; otherwise `activation` is applied to the convolution output) + - a (batch) normalisation layer + `activation` (if `use_norm[2] == true`; otherwise + `activation` is applied to the convolution output) See Fig. 3 in [reference](https://arxiv.org/abs/1704.04861v1). @@ -80,7 +88,8 @@ See Fig. 3 in [reference](https://arxiv.org/abs/1704.04861v1). - `outplanes`: number of output feature maps - `activation`: the activation function for the final layer - `revnorm`: set to `true` to place the batch norm before the convolution - - `use_norm`: a tuple of two booleans to specify whether to use normalization for the first and second convolution + - `use_norm`: a tuple of two booleans to specify whether to use normalization for the first and + second convolution - `stride`: stride of the first convolution kernel - `pad`: padding of the first convolution kernel - `dilation`: dilation of the first convolution kernel @@ -88,9 +97,8 @@ See Fig. 3 in [reference](https://arxiv.org/abs/1704.04861v1). """ function depthwise_sep_conv_norm(kernel_size, inplanes::Integer, outplanes::Integer, activation = relu; norm_layer = BatchNorm, - revnorm::Bool = false, - use_norm::NTuple{2, Bool} = (true, true), - stride::Integer = 1, kwargs...) + revnorm::Bool = false, stride::Integer = 1, + use_norm::NTuple{2, Bool} = (true, true), kwargs...) return vcat(conv_norm(kernel_size, inplanes, inplanes, activation; norm_layer, revnorm, use_norm = use_norm[1], stride, groups = inplanes, kwargs...), @@ -135,9 +143,9 @@ function invertedresidual(kernel_size, inplanes::Integer, hidden_planes::Integer end function invertedresidual(kernel_size, inplanes::Integer, outplanes::Integer, - activation = relu; stride::Integer, expansion, + activation = relu; stride::Integer, expansion::Real, reduction::Union{Nothing, Integer} = nothing) - hidden_planes = Int(inplanes * expansion) + hidden_planes = floor(Int, inplanes * expansion) return invertedresidual(kernel_size, inplanes, hidden_planes, outplanes, activation; stride, reduction) end diff --git a/src/layers/drop.jl b/src/layers/drop.jl index f823d5c22..31c06c07a 100644 --- a/src/layers/drop.jl +++ b/src/layers/drop.jl @@ -50,6 +50,23 @@ end # Dispatch for CPU dropblock_mask(rng, x, gamma, bs) = _dropblock_mask(rng, x, gamma, bs) +""" + DropBlock(drop_block_prob = 0.1, block_size = 7, gamma_scale = 1.0, + rng = rng_from_array()) + +The `DropBlock` layer. While training, it zeroes out continguous regions of +size `block_size` in the input. During inference, it simply returns the input `x`. +((reference)[https://arxiv.org/abs/1810.12890]) + +# Arguments + + - `drop_block_prob`: probability of dropping a block + - `block_size`: size of the block to drop + - `gamma_scale`: multiplicative factor for `gamma` used. For the calculation of gamma, + refer to [the paper](https://arxiv.org/abs/1810.12890). + - `rng`: can be used to pass in a custom RNG instead of the default. Custom RNGs are only + supported on the CPU. +""" mutable struct DropBlock{F, R <: AbstractRNG} drop_block_prob::F block_size::Integer @@ -84,23 +101,6 @@ function Flux.testmode!(m::DropBlock, mode = true) return (m.active = (isnothing(mode) || mode == :auto) ? nothing : !mode; m) end -""" - DropBlock(drop_block_prob = 0.1, block_size = 7, gamma_scale = 1.0, - rng = rng_from_array()) - -The `DropBlock` layer. While training, it zeroes out continguous regions of -size `block_size` in the input. During inference, it simply returns the input `x`. -((reference)[https://arxiv.org/abs/1810.12890]) - -# Arguments - - - `drop_block_prob`: probability of dropping a block - - `block_size`: size of the block to drop - - `gamma_scale`: multiplicative factor for `gamma` used. For the calculation of gamma, - refer to [the paper](https://arxiv.org/abs/1810.12890). - - `rng`: can be used to pass in a custom RNG instead of the default. Custom RNGs are only - supported on the CPU. -""" function DropBlock(drop_block_prob = 0.1, block_size::Integer = 7, gamma_scale = 1.0, rng = rng_from_array()) if drop_block_prob == 0.0 diff --git a/src/layers/embeddings.jl b/src/layers/embeddings.jl index 560ac074d..cb9b8378c 100644 --- a/src/layers/embeddings.jl +++ b/src/layers/embeddings.jl @@ -23,10 +23,8 @@ function PatchEmbedding(imsize::Dims{2} = (224, 224); inchannels::Integer = 3, norm_layer = planes -> identity, flatten::Bool = true) im_height, im_width = imsize patch_height, patch_width = patch_size - @assert (im_height % patch_height == 0) && (im_width % patch_width == 0) "Image dimensions must be divisible by the patch size." - return Chain(Conv(patch_size, inchannels => embedplanes; stride = patch_size), flatten ? _flatten_spatial : identity, norm_layer(embedplanes)) diff --git a/src/layers/pool.jl b/src/layers/pool.jl index 049c06451..60447ddea 100644 --- a/src/layers/pool.jl +++ b/src/layers/pool.jl @@ -4,12 +4,13 @@ A type of adaptive pooling layer which uses both mean and max pooling and combines them to produce a single output. Note that this is equivalent to -`Parallel(connection, AdaptiveMeanPool(output_size), AdaptiveMaxPool(output_size))` +`Parallel(connection, AdaptiveMeanPool(output_size), AdaptiveMaxPool(output_size))`. +When `connection` is not specified, it defaults to `+`. # Arguments - - `output_size`: The size of the output after pooling. - `connection`: The connection type to use. + - `output_size`: The size of the output after pooling. """ function AdaptiveMeanMaxPool(connection, output_size::Tuple = (1, 1)) return Parallel(connection, AdaptiveMeanPool(output_size), AdaptiveMaxPool(output_size)) diff --git a/src/mixers/core.jl b/src/mixers/core.jl index f08a5f5d5..875136b2e 100644 --- a/src/mixers/core.jl +++ b/src/mixers/core.jl @@ -1,7 +1,7 @@ """ mlpmixer(block, imsize::Dims{2} = (224, 224); inchannels::Integer = 3, norm_layer = LayerNorm, patch_size::Dims{2} = (16, 16), embedplanes = 512, drop_path_rate = 0., - depth = 12, nclasses::Integer = 1000, kwargs...) + depth::Integer = 12, nclasses::Integer = 1000, kwargs...) Creates a model with the MLPMixer architecture. ([reference](https://arxiv.org/pdf/2105.01601)). @@ -23,7 +23,8 @@ Creates a model with the MLPMixer architecture. """ function mlpmixer(block, imsize::Dims{2} = (224, 224); norm_layer = LayerNorm, patch_size::Dims{2} = (16, 16), embedplanes = 512, drop_path_rate = 0.0, - depth = 12, inchannels::Integer = 3, nclasses::Integer = 1000, kwargs...) + depth::Integer = 12, inchannels::Integer = 3, nclasses::Integer = 1000, + kwargs...) npatches = prod(imsize .÷ patch_size) dp_rates = linear_scheduler(drop_path_rate; depth) layers = Chain(PatchEmbedding(imsize; inchannels, patch_size, embedplanes), @@ -35,7 +36,7 @@ function mlpmixer(block, imsize::Dims{2} = (224, 224); norm_layer = LayerNorm, end # Configurations for MLPMixer models -const MIXER_CONFIGS = Dict(:small => Dict(:depth => 8, :planes => 512), - :base => Dict(:depth => 12, :planes => 768), - :large => Dict(:depth => 24, :planes => 1024), - :huge => Dict(:depth => 32, :planes => 1280)) +const MIXER_CONFIGS = Dict(:small => (depth = 8, embedplanes = 512), + :base => (depth = 12, embedplanes = 768), + :large => (depth = 24, embedplanes = 1024), + :huge => (depth = 32, embedplanes = 1280)) diff --git a/src/mixers/gmlp.jl b/src/mixers/gmlp.jl index df4a52b70..ab89baadc 100644 --- a/src/mixers/gmlp.jl +++ b/src/mixers/gmlp.jl @@ -63,7 +63,7 @@ Creates a feedforward block based on the gMLP model architecture described in th function spatial_gating_block(planes::Integer, npatches::Integer; mlp_ratio = 4.0, norm_layer = LayerNorm, mlp_layer = gated_mlp_block, dropout_rate = 0.0, drop_path_rate = 0.0, activation = gelu) - channelplanes = Int(mlp_ratio * planes) + channelplanes = floor(Int, mlp_ratio * planes) sgu = inplanes -> SpatialGatingUnit(inplanes, npatches; norm_layer) return SkipConnection(Chain(norm_layer(planes), mlp_layer(sgu, planes, channelplanes; activation, @@ -72,7 +72,7 @@ function spatial_gating_block(planes::Integer, npatches::Integer; mlp_ratio = 4. end """ - gMLP(size::Symbol; patch_size::Dims{2} = (16, 16), imsize::Dims{2} = (224, 224), + gMLP(config::Symbol; patch_size::Dims{2} = (16, 16), imsize::Dims{2} = (224, 224), inchannels::Integer = 3, nclasses::Integer = 1000) Creates a model with the gMLP architecture. @@ -80,7 +80,7 @@ Creates a model with the gMLP architecture. # Arguments - - `size`: the size of the model - one of `small`, `base`, `large` or `huge` + - `config`: the size of the model - one of `small`, `base`, `large` or `huge` - `patch_size`: the size of the patches - `imsize`: the size of the input image - `inchannels`: the number of input channels @@ -93,13 +93,11 @@ struct gMLP end @functor gMLP -function gMLP(size::Symbol; imsize::Dims{2} = (224, 224), patch_size::Dims{2} = (16, 16), +function gMLP(config::Symbol; imsize::Dims{2} = (224, 224), patch_size::Dims{2} = (16, 16), inchannels::Integer = 3, nclasses::Integer = 1000) - _checkconfig(size, keys(MIXER_CONFIGS)) - depth = MIXER_CONFIGS[size][:depth] - embedplanes = MIXER_CONFIGS[size][:planes] + _checkconfig(config, keys(MIXER_CONFIGS)) layers = mlpmixer(spatial_gating_block, imsize; mlp_layer = gated_mlp_block, patch_size, - embedplanes, depth, inchannels, nclasses) + MIXER_CONFIGS[config]..., inchannels, nclasses) return gMLP(layers) end diff --git a/src/mixers/mlpmixer.jl b/src/mixers/mlpmixer.jl index 90a6aaebb..b784a8f8e 100644 --- a/src/mixers/mlpmixer.jl +++ b/src/mixers/mlpmixer.jl @@ -20,7 +20,7 @@ Creates a feedforward block for the MLPMixer architecture. function mixerblock(planes::Integer, npatches::Integer; mlp_layer = mlp_block, mlp_ratio::NTuple{2, Number} = (0.5, 4.0), dropout_rate = 0.0, drop_path_rate = 0.0, activation = gelu) - tokenplanes, channelplanes = Int.(planes .* mlp_ratio) + tokenplanes, channelplanes = floor.(Int, planes .* mlp_ratio) return Chain(SkipConnection(Chain(LayerNorm(planes), swapdims((2, 1, 3)), mlp_layer(npatches, tokenplanes; activation, @@ -34,7 +34,7 @@ function mixerblock(planes::Integer, npatches::Integer; mlp_layer = mlp_block, end """ - MLPMixer(size::Symbol; patch_size::Dims{2} = (16, 16), imsize::Dims{2} = (224, 224), + MLPMixer(config::Symbol; patch_size::Dims{2} = (16, 16), imsize::Dims{2} = (224, 224), inchannels::Integer = 3, nclasses::Integer = 1000) Creates a model with the MLPMixer architecture. @@ -42,7 +42,7 @@ Creates a model with the MLPMixer architecture. # Arguments - - `size`: the size of the model - one of `small`, `base`, `large` or `huge` + - `config`: the size of the model - one of `small`, `base`, `large` or `huge` - `patch_size`: the size of the patches - `imsize`: the size of the input image - `drop_path_rate`: Stochastic depth rate @@ -56,13 +56,10 @@ struct MLPMixer end @functor MLPMixer -function MLPMixer(size::Symbol; imsize::Dims{2} = (224, 224), - patch_size::Dims{2} = (16, 16), +function MLPMixer(config::Symbol; imsize::Dims{2} = (224, 224), patch_size::Dims{2} = (16, 16), inchannels::Integer = 3, nclasses::Integer = 1000) - _checkconfig(size, keys(MIXER_CONFIGS)) - depth = MIXER_CONFIGS[size][:depth] - embedplanes = MIXER_CONFIGS[size][:planes] - layers = mlpmixer(mixerblock, imsize; patch_size, embedplanes, depth, inchannels, + _checkconfig(config, keys(MIXER_CONFIGS)) + layers = mlpmixer(mixerblock, imsize; patch_size, MIXER_CONFIGS[config]..., inchannels, nclasses) return MLPMixer(layers) end diff --git a/src/mixers/resmlp.jl b/src/mixers/resmlp.jl index f2c9ece15..21ad89d65 100644 --- a/src/mixers/resmlp.jl +++ b/src/mixers/resmlp.jl @@ -27,15 +27,14 @@ function resmixerblock(planes::Integer, npatches::Integer; mlp_layer = mlp_block LayerScale(planes, layerscale_init), DropPath(drop_path_rate)), +), SkipConnection(Chain(Flux.Scale(planes), - mlp_layer(planes, Int(mlp_ratio * planes); - dropout_rate, - activation), + mlp_layer(planes, floor(Int, mlp_ratio * planes); + dropout_rate, activation), LayerScale(planes, layerscale_init), DropPath(drop_path_rate)), +)) end """ - ResMLP(size::Symbol; patch_size::Dims{2} = (16, 16), imsize::Dims{2} = (224, 224), + ResMLP(config::Symbol; patch_size::Dims{2} = (16, 16), imsize::Dims{2} = (224, 224), inchannels::Integer = 3, nclasses::Integer = 1000) Creates a model with the ResMLP architecture. @@ -43,7 +42,7 @@ Creates a model with the ResMLP architecture. # Arguments - - `size`: the size of the model - one of `small`, `base`, `large` or `huge` + - `config`: the size of the model - one of `small`, `base`, `large` or `huge` - `patch_size`: the size of the patches - `imsize`: the size of the input image - `inchannels`: the number of input channels @@ -56,13 +55,12 @@ struct ResMLP end @functor ResMLP -function ResMLP(size::Symbol; imsize::Dims{2} = (224, 224), patch_size::Dims{2} = (16, 16), +function ResMLP(config::Symbol; imsize::Dims{2} = (224, 224), + patch_size::Dims{2} = (16, 16), inchannels::Integer = 3, nclasses::Integer = 1000) - _checkconfig(size, keys(MIXER_CONFIGS)) - depth = MIXER_CONFIGS[size][:depth] - embedplanes = MIXER_CONFIGS[size][:planes] - layers = mlpmixer(resmixerblock, imsize; mlp_ratio = 4.0, patch_size, embedplanes, - depth, inchannels, nclasses) + _checkconfig(config, keys(MIXER_CONFIGS)) + layers = mlpmixer(resmixerblock, imsize; mlp_ratio = 4.0, patch_size, + MIXER_CONFIGS[config]..., inchannels, nclasses) return ResMLP(layers) end diff --git a/src/utilities.jl b/src/utilities.jl index 981777228..359010cfe 100644 --- a/src/utilities.jl +++ b/src/utilities.jl @@ -67,7 +67,7 @@ end Returns the dropout rates for a given depth using the linear scaling rule. """ -function linear_scheduler(drop_rate = 0.0; depth, start_value = 0.0) +function linear_scheduler(drop_rate = 0.0; depth::Integer, start_value = 0.0) return LinRange(start_value, drop_rate, depth) end diff --git a/src/vit-based/vit.jl b/src/vit-based/vit.jl index 6f145a4bb..099d00639 100644 --- a/src/vit-based/vit.jl +++ b/src/vit-based/vit.jl @@ -76,7 +76,7 @@ const VIT_CONFIGS = Dict(:tiny => (depth = 12, embedplanes = 192, nheads = 3), mlp_ratio = 64 // 13)) """ - ViT(mode::Symbol = base; imsize::Dims{2} = (256, 256), inchannels::Integer = 3, + ViT(config::Symbol = base; imsize::Dims{2} = (256, 256), inchannels::Integer = 3, patch_size::Dims{2} = (16, 16), pool = :class, nclasses::Integer = 1000) Creates a Vision Transformer (ViT) model. @@ -84,7 +84,7 @@ Creates a Vision Transformer (ViT) model. # Arguments - - `mode`: the model configuration, one of + - `config`: the model configuration, one of `[:tiny, :small, :base, :large, :huge, :giant, :gigantic]` - `imsize`: image size - `inchannels`: number of input channels @@ -99,10 +99,10 @@ struct ViT end @functor ViT -function ViT(mode::Symbol; imsize::Dims{2} = (256, 256), patch_size::Dims{2} = (16, 16), +function ViT(config::Symbol; imsize::Dims{2} = (256, 256), patch_size::Dims{2} = (16, 16), inchannels::Integer = 3, nclasses::Integer = 1000) - _checkconfig(mode, keys(VIT_CONFIGS)) - layers = vit(imsize; inchannels, patch_size, nclasses, VIT_CONFIGS[mode]...) + _checkconfig(config, keys(VIT_CONFIGS)) + layers = vit(imsize; inchannels, patch_size, nclasses, VIT_CONFIGS[config]...) return ViT(layers) end diff --git a/test/convnets.jl b/test/convnets.jl index 35a745b87..c58cad7be 100644 --- a/test/convnets.jl +++ b/test/convnets.jl @@ -123,16 +123,16 @@ end end @testset "EfficientNet" begin - @testset "EfficientNet($name)" for name in [:b0, :b1, :b2, :b3, :b4, :b5] #:b6, :b7, :b8] + @testset "EfficientNet($config)" for config in [:b0, :b1, :b2, :b3, :b4, :b5] #:b6, :b7, :b8] # preferred image resolution scaling - r = Metalhead.EFFICIENTNET_GLOBAL_CONFIGS[name][1] + r = Metalhead.EFFICIENTNET_GLOBAL_CONFIGS[config][1] x = rand(Float32, r, r, 3, 1) - m = EfficientNet(name) + m = EfficientNet(config) @test size(m(x)) == (1000, 1) - if (EfficientNet, name) in PRETRAINED_MODELS - @test acctest(EfficientNet(name, pretrain = true)) + if (EfficientNet, config) in PRETRAINED_MODELS + @test acctest(EfficientNet(config, pretrain = true)) else - @test_throws ArgumentError EfficientNet(name, pretrain = true) + @test_throws ArgumentError EfficientNet(config, pretrain = true) end @test gradtest(m, x) _gc() @@ -249,13 +249,13 @@ end end _gc() @testset "MobileNetv3" verbose = true begin - @testset for mode in [:small, :large] - m = MobileNetv3(mode) + @testset for config in [:small, :large] + m = MobileNetv3(config) @test size(m(x_224)) == (1000, 1) - if (MobileNetv3, mode) in PRETRAINED_MODELS - @test acctest(MobileNetv3(mode; pretrain = true)) + if (MobileNetv3, config) in PRETRAINED_MODELS + @test acctest(MobileNetv3(config; pretrain = true)) else - @test_throws ArgumentError MobileNetv3(mode; pretrain = true) + @test_throws ArgumentError MobileNetv3(config; pretrain = true) end @test gradtest(m, x_224) _gc() @@ -264,8 +264,8 @@ end end @testset "ConvNeXt" verbose = true begin - @testset for mode in [:small, :base, :large, :tiny, :xlarge] - m = ConvNeXt(mode) + @testset for config in [:small, :base, :large, :tiny, :xlarge] + m = ConvNeXt(config) @test size(m(x_224)) == (1000, 1) @test gradtest(m, x_224) _gc() @@ -273,8 +273,8 @@ end end @testset "ConvMixer" verbose = true begin - @testset for mode in [:small, :base, :large] - m = ConvMixer(mode) + @testset for config in [:small, :base, :large] + m = ConvMixer(config) @test size(m(x_224)) == (1000, 1) @test gradtest(m, x_224) _gc() diff --git a/test/mixers.jl b/test/mixers.jl index 51cdd736e..2a5d9af70 100644 --- a/test/mixers.jl +++ b/test/mixers.jl @@ -1,6 +1,6 @@ @testset for model in [MLPMixer, ResMLP, gMLP] - @testset for mode in [:small, :base, :large] - m = model(mode) + @testset for config in [:small, :base, :large] + m = model(config) @test size(m(x_224)) == (1000, 1) @test gradtest(m, x_224) _gc() diff --git a/test/vits.jl b/test/vits.jl index fb9fd6b02..7561cfdb5 100644 --- a/test/vits.jl +++ b/test/vits.jl @@ -1,6 +1,6 @@ @testset "ViT" begin - for mode in [:tiny, :small, :base, :large, :huge] # :giant, :gigantic] - m = ViT(mode) + for config in [:tiny, :small, :base, :large, :huge] # :giant, :gigantic] + m = ViT(config) @test size(m(x_256)) == (1000, 1) @test gradtest(m, x_256) _gc()