FluxML · CarloLucibello · Apr 23, 2023 · Sep 7, 2022 · Sep 14, 2022 · Sep 14, 2022
diff --git a/Project.toml b/Project.toml
@@ -1,6 +1,6 @@
 name = "Metalhead"
 uuid = "dbeba491-748d-5e0e-a39e-b530a07fa0cc"
-version = "0.8.0-DEV"
+version = "0.8.0"
 
 [deps]
 Artifacts = "56f22d72-fd6d-98f1-02f0-08ddc0907c33"

diff --git a/docs/Project.toml b/docs/Project.toml
@@ -5,4 +5,4 @@ Documenter = "e30172f5-a6a5-5a46-863b-614d45cd2de4"
 Flux = "587475ba-b771-5e3f-ad9e-33799f191a9c"
 Images = "916415d5-f1e6-5110-898d-aaa5f9f070e0"
 LazyArtifacts = "4af54fe1-eca0-43a8-85a7-787d91b784e3"
-OneHotArrays = "0b1bfda6-eb8a-41d2-88d8-f5af5cad476f"
+Optimisers = "3bd65402-5787-11e9-1adc-39752487f4e2"
diff --git a/docs/make.jl b/docs/make.jl
@@ -1,26 +1,41 @@
-using Documenter, Metalhead, Artifacts, LazyArtifacts, Images, OneHotArrays, DataAugmentation, Flux
+using Documenter, Metalhead, Artifacts, LazyArtifacts, Images, DataAugmentation, Flux
 
 DocMeta.setdocmeta!(Metalhead, :DocTestSetup, :(using Metalhead); recursive = true)
 
-makedocs(modules = [Metalhead, Artifacts, LazyArtifacts, Images, OneHotArrays, DataAugmentation, Flux],
+makedocs(; modules = [Metalhead, Artifacts, LazyArtifacts, Images, DataAugmentation, Flux],
          sitename = "Metalhead.jl",
          doctest = false,
          pages = ["Home" => "index.md",
-                  "Tutorials" => [
-                      "tutorials/quickstart.md",
-                   ],
-                  "Developer guide" => "contributing.md",
-                  "API reference" => [
-                      "api/reference.md",
-                   ],
-                 ],
-         format = Documenter.HTML(
-              canonical = "https://fluxml.ai/Metalhead.jl/stable/",
-            #   analytics = "UA-36890222-9",
-              assets = ["assets/flux.css"],
-              prettyurls = get(ENV, "CI", nothing) == "true"),
-        )
+             "Tutorials" => [
+                 "tutorials/quickstart.md",
+                 "tutorials/pretrained.md",
+             ],
+             "API reference" => [
+                "Convolutional Neural Networks" => [
+                    "api/others.md",
+                    "api/inception.md",
+                    "api/resnet.md",
+                    "api/densenet.md",
+                    "api/hybrid.md",
+                    "api/layers.md",
+                ],
+                "Mixers" => [
+                    "api/mixers.md",
+                ],
+                "Vision Transformers" => [
+                    "api/vit.md",
+                ],
+                "api/utilities.md"
+             ],
+             "How To" => [
+                 "howto/resnet.md",
+             ],
+             "Contributing to Metalhead" => "contributing.md",
+         ],
+         format = Documenter.HTML(; canonical = "https://fluxml.ai/Metalhead.jl/stable/",
+                                  #   analytics = "UA-36890222-9",
+                                  assets = ["assets/flux.css"],
+                                  prettyurls = get(ENV, "CI", nothing) == "true"))
 
-deploydocs(repo = "github.com/FluxML/Metalhead.jl.git",
-           target = "build",
+deploydocs(; repo = "github.com/FluxML/Metalhead.jl.git", target = "build",
            push_preview = true)
diff --git a/docs/src/api/densenet.md b/docs/src/api/densenet.md
@@ -0,0 +1,15 @@
+# DenseNet
+
+This is the API reference for the DenseNet model present in Metalhead.jl.
+
+## The higher level model
+
+```@docs
+DenseNet
+```
+
+## The core function
+
+```@docs
+Metalhead.densenet
+```
diff --git a/docs/src/api/hybrid.md b/docs/src/api/hybrid.md
@@ -0,0 +1,17 @@
+# Hybrid CNN architectures
+
+These models are hybrid CNN architectures that borrow certain ideas from vision transformer models.
+
+## The higher-level model constructors
+
+```@docs
+ConvMixer
+ConvNeXt
+```
+
+## The mid-level functions
+
+```@docs
+Metalhead.convmixer
+Metalhead.convnext
+```
diff --git a/docs/src/api/inception.md b/docs/src/api/inception.md
@@ -0,0 +1,23 @@
+# Inception models
+
+This is the API reference for the Inception family of models supported by Metalhead.jl.
+
+## The higher-level model constructors
+
+```@docs
+GoogLeNet
+Inceptionv3
+Inceptionv4
+InceptionResNetv2
+Xception
+```
+
+## The mid-level functions
+
+```@docs
+Metalhead.googlenet
+Metalhead.inceptionv3
+Metalhead.inceptionv4
+Metalhead.inceptionresnetv2
+Metalhead.xception
+```
diff --git a/docs/src/api/layers.md b/docs/src/api/layers.md
@@ -0,0 +1,18 @@
+# Layers
+
+Metalhead also defines a module called `Layers` which contains some more modern layers that are not available in Flux. To use the functions defined in the `Layers` module, you need to import it.
+
+```julia
+using Metalhead: Layers
+```
+
+This page contains the API reference for the `Layers` module.
+
+!!! warning
+
+    The `Layers` module is still a work in progress. While we will endeavour to keep the API stable, we cannot guarantee that it will not change in the future. If you find any of the functions in this 
+    module do not work as expected, please open an issue on GitHub.
+
+```@autodocs
+Modules = [Metalhead.Layers]
+```
diff --git a/docs/src/api/mixers.md b/docs/src/api/mixers.md
@@ -0,0 +1,26 @@
+# MLPMixer-like models
+
+This is the API reference for the MLPMixer-like models supported by Metalhead.jl.
+
+## The higher-level model constructors
+
+```@docs
+MLPMixer
+ResMLP
+gMLP
+```
+
+## The core MLPMixer function
+
+```@docs
+Metalhead.mlpmixer
+```
+
+## The block functions
+
+```@docs
+Metalhead.mixerblock
+Metalhead.resmixerblock
+Metalhead.SpatialGatingUnit
+Metalhead.spatialgatingblock
+```
diff --git a/docs/src/api/others.md b/docs/src/api/others.md
@@ -0,0 +1,19 @@
+# Other models
+
+This is the API reference for some of the other models supported by Metalhead.jl that do not fit into the other categories.
+
+## The higher-level model constructors
+
+```@docs
+AlexNet
+VGG
+SqueezeNet
+```
+
+## The mid-level functions
+
+```@docs
+Metalhead.alexnet
+Metalhead.vgg
+Metalhead.squeezenet
+```
diff --git a/docs/src/api/reference.md b/docs/src/api/reference.md
diff --git a/docs/src/api/resnet.md b/docs/src/api/resnet.md
@@ -0,0 +1,61 @@
+# ResNet-like models
+
+This is the API reference for the ResNet inspired model structures present in Metalhead.jl.
+
+## The higher-level model constructors
+
+```@docs
+ResNet
+WideResNet
+ResNeXt
+SEResNet
+SEResNeXt
+Res2Net
+Res2NeXt
+```
+
+## The mid-level function
+
+```@docs
+Metalhead.resnet
+```
+
+## Lower-level functions and builders
+
+### Block functions
+
+```@docs
+Metalhead.basicblock
+Metalhead.bottleneck
+Metalhead.bottle2neck
+```
+
+### Downsampling functions
+
+```@docs
+Metalhead.downsample_identity
+Metalhead.downsample_conv
+Metalhead.downsample_pool
+```
+
+### Block builders
+
+```@docs
+Metalhead.basicblock_builder
+Metalhead.bottleneck_builder
+Metalhead.bottle2neck_builder
+```
+
+### Generic ResNet model builder
+
+```@docs
+Metalhead.build_resnet
+```
+
+## Utility callbacks
+
+```@docs
+Metalhead.resnet_planes
+Metalhead.resnet_stride
+Metalhead.resnet_stem
+```
diff --git a/docs/src/api/utilities.md b/docs/src/api/utilities.md
@@ -0,0 +1,10 @@
+# Utilities
+
+Metalhead provides some utility functions for making it easier to work with the models inside the library or to build new ones. The API reference for these is documented below.
+
+## `backbone` and `classifier`
+
+```@docs
+backbone
+classifier
+```
diff --git a/docs/src/api/vit.md b/docs/src/api/vit.md
@@ -0,0 +1,15 @@
+# Vision Transformer models
+
+This is the API reference for the Vision Transformer models supported by Metalhead.jl.
+
+## The higher-level model constructors
+
+```@docs
+ViT
+```
+
+## The mid-level functions
+
+```@docs
+Metalhead.vit
+```
diff --git a/docs/src/contributing.md b/docs/src/contributing.md
@@ -1,4 +1,4 @@
-# Contributing to Metalhead.jl
+# [Contribute to Metalhead.jl](@id contributing)
 
 We welcome contributions from anyone to Metalhead.jl! Thank you for taking the time to make our ecosystem better.
 
@@ -16,7 +16,7 @@ To add a new model architecture to Metalhead.jl, you can [open a PR](https://git
 
 - reuse layers from Flux as much as possible (e.g. use `Parallel` before defining a `Bottleneck` struct)
 - adhere as closely as possible to a reference such as a published paper (i.e. the structure of your model should follow intuitively from the paper)
-- use generic functional builders (e.g. [`Metalhead.resnet`](@ref) is the core function that builds "ResNet-like" models)
+- use generic functional builders (e.g. [`Metalhead.resnet`](@ref) is the underlying function that builds "ResNet-like" models)
 - use multiple dispatch to add convenience constructors that wrap your functional builder
 
 When in doubt, just open a PR! We are more than happy to help review your code to help it align with the rest of the library. After adding a model, you might consider adding some pre-trained weights (see below).
@@ -28,10 +28,11 @@ To add pre-trained weights for an existing model or new model, you can [open a P
 All Metalhead.jl model artifacts are hosted using HuggingFace. You can find the FluxML account [here](https://huggingface.co/FluxML). This [documentation from HuggingFace](https://huggingface.co/docs/hub/models) will provide you with an introduction to their ModelHub. In short, the Model Hub is a collection of Git repositories, similar to Julia packages on GitHub. This means you can [make a pull request to our HuggingFace repositories](https://huggingface.co/docs/hub/repositories-pull-requests-discussions) to upload updated weight artifacts just like you would make a PR on GitHub to upload code.
 
 1. Train your model or port the weights from another framework.
-2. Save the model using [BSON.jl](https://github.com/JuliaIO/BSON.jl) with `BSON.@save "modelname.bson" model`. It is important that your model is saved under the key `model`.
+2. Save the model using [BSON.jl](https://github.com/JuliaIO/BSON.jl) with `BSON.@save "modelname.bson" model`. It is important that your model is saved under the key `model`. Note that due to the way this
+process works, to maintain compatibility with different Julia versions, the model must be saved using the LTS version of Julia (currently 1.6).
 3. Compress the saved model as a tarball using `tar -cvzf modelname.tar.gz modelname.bson`.
 4. Obtain the SHAs (see the [Pkg docs](https://pkgdocs.julialang.org/v1/artifacts/#Basic-Usage)). Edit the `Artifacts.toml` file in the Metalhead.jl repository and add entry for your model. You can leave the URL empty for now.
-5. Open a PR on Metalhead.jl. Be sure to ping a maintainer (e.g. `@darsnack`) to let us know that you are adding a pre-trained weight. We will create a model repository on HuggingFace if it does not already exist.
+5. Open a PR on Metalhead.jl. Be sure to ping a maintainer (e.g. `@darsnack` or `@theabhirath`) to let us know that you are adding a pre-trained weight. We will create a model repository on HuggingFace if it does not already exist.
 6. Open a PR to the [corresponding HuggingFace repo](https://huggingface.co/FluxML). Do this by going to the "Community" tab in the HuggingFace repository. PRs and discussions are shown as the same thing in the HuggingFace web app. You can use your local Git program to make clone the repo and make PRs if you wish. Check out the [guide on PRs to HuggingFace](https://huggingface.co/docs/hub/repositories-pull-requests-discussions) for more information.
 7. Copy the download URL for the model file that you added to HuggingFace. Make sure to grab the URL for a specific commit and not for the `main` branch.
 8. Update your Metalhead.jl PR by adding the URL to the Artifacts.toml.

diff --git a/docs/src/howto/resnet.md b/docs/src/howto/resnet.md
@@ -0,0 +1,21 @@
+# Using the ResNet model family in Metalhead.jl
+
+ResNets are one of the most common convolutional neural network (CNN) models used today. Originally proposed by He et al. in [**Deep Residual Learning for Image Recognition**](https://arxiv.org/abs/1512.03385), they use a residual structure to learn identity mappings that strengthens gradient propagation, thereby helping to prevent the vanishing gradient problem and allow the advent of truly deep neural networks as used today.
+
+Many variants on the original ResNet structure have since become widely used such as [Wide-ResNet](https://arxiv.org/abs/1605.07146), [ResNeXt](https://arxiv.org/abs/1611.05431v2), [SE-ResNet](https://arxiv.org/abs/1709.01507) and [Res2Net](https://www.notion.so/ResNet-user-guide-b4c09e5bb5ae41328165a3f160a104f6). Apart from suggesting modifications to the structure of the residual block, papers have also suggested modifying the stem of the network, adding newer regularisation options in the form of stochastic depth and DropBlock, and changing the downsampling path for the blocks to improve performance.
+
+Metalhead provides an extensible, hackable yet powerful interface for working with ResNets that provides built-in toggles for commonly used options in papers and other deep learning libraries, while also allowing the user to build custom model structures if they want very easily.
+
+## Pre-trained models
+
+Metalhead provides a variety of pretrained models in the ResNet family to allow users to get started quickly with tasks like transfer learning. Pretrained models for [`ResNet`](@ref) with depth 18, 34, 50, 101 and 152 is supported, as is [`WideResNet`](@ref) with depths 50 and 101. [`ResNeXt`](@ref) also supports some configurations of pretrained models - to know more, check out the documentation for the model.
+
+This is as easy as setting the `pretrain` keyword to `true` when constructing the model. For example, to load a pretrained `ResNet` with depth 50, you can do the following:
+
+```julia
+using Metalhead
+
+model = ResNet(50; pretrain=true)
+```
+
+To check out more about using pretrained models, check out the [pretrained models guide](@ref pretrained).