diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml
index a8f5edf26d2b3..cee0b2a2bd7b2 100644
--- a/.github/workflows/build_and_test.yml
+++ b/.github/workflows/build_and_test.yml
@@ -493,7 +493,8 @@ jobs:
     - name: Install dependencies for documentation generation
       run: |
         # pandoc is required to generate PySpark APIs as well in nbsphinx.
-        apt-get install -y libcurl4-openssl-dev pandoc
+        apt-get install -y libcurl4-openssl-dev pandoc libfontconfig1-dev libharfbuzz-dev \
+          libfribidi-dev libfreetype6-dev libpng-dev libtiff5-dev libjpeg-dev
         # TODO(SPARK-32407): Sphinx 3.1+ does not correctly index nested classes.
         #   See also https://github.com/sphinx-doc/sphinx/issues/7551.
         # Jinja2 3.0.0+ causes error when building with Sphinx.
@@ -503,6 +504,8 @@ jobs:
         apt-get update -y
         apt-get install -y ruby ruby-dev
         Rscript -e "install.packages(c('devtools', 'testthat', 'knitr', 'rmarkdown', 'roxygen2'), repos='https://cloud.r-project.org/')"
+        Rscript -e "devtools::install_version('pkgdown', version='2.0.1', repos='https://cloud.r-project.org')"
+        Rscript -e "devtools::install_version('preferably', version='0.4', repos='https://cloud.r-project.org')"
         gem install bundler
         cd docs
         bundle install
diff --git a/R/create-docs.sh b/R/create-docs.sh
index ce0fb48b9ff27..1774d5870de5a 100755
--- a/R/create-docs.sh
+++ b/R/create-docs.sh
@@ -24,6 +24,8 @@
 # $SPARK_HOME/R/pkg/html
 # The vignettes can be found in
 # $SPARK_HOME/R/pkg/vignettes/sparkr_vignettes.html
+# pkgdown website can be found in
+# $SPARK_HOME/R/pkg/docs
 
 set -o pipefail
 set -e
@@ -51,6 +53,18 @@ pushd pkg/html
 
 "$R_SCRIPT_PATH/Rscript" -e 'libDir <- "../../lib"; library(SparkR, lib.loc=libDir); knitr::knit_rd("SparkR", links = tools::findHTMLlinks(file.path(libDir, "SparkR")))'
 
+
+# Determine Spark(R) version
+SPARK_VERSION=$(grep -oP "(?<=Version:\ ).*" ../DESCRIPTION)
+
+# Update url
+sed "s/{SPARK_VERSION}/$SPARK_VERSION/" ../pkgdown/_pkgdown_template.yml > ../_pkgdown.yml
+
+"$R_SCRIPT_PATH/Rscript" -e 'libDir <- "../../lib"; library(SparkR, lib.loc=libDir); pkgdown::build_site("..")'
+
+# Clean temporary config
+rm ../_pkgdown.yml
+
 popd
 
 popd
diff --git a/R/pkg/.Rbuildignore b/R/pkg/.Rbuildignore
index 18b2db69db8f1..2bfcda66f3e0d 100644
--- a/R/pkg/.Rbuildignore
+++ b/R/pkg/.Rbuildignore
@@ -7,3 +7,6 @@
 ^src-native$
 ^html$
 ^tests/fulltests/*
+^_pkgdown\.yml$
+^docs$
+^pkgdown$
diff --git a/R/pkg/.gitignore b/R/pkg/.gitignore
new file mode 100644
index 0000000000000..d8f8d46921aa8
--- /dev/null
+++ b/R/pkg/.gitignore
@@ -0,0 +1 @@
+docs
diff --git a/R/pkg/R/DataFrame.R b/R/pkg/R/DataFrame.R
index e37509ad488c0..db616626f8fc8 100644
--- a/R/pkg/R/DataFrame.R
+++ b/R/pkg/R/DataFrame.R
@@ -890,10 +890,9 @@ setMethod("toJSON",
 #'             save mode (it is 'error' by default)
 #' @param ... additional argument(s) passed to the method.
 #'            You can find the JSON-specific options for writing JSON files in
-#'            \url{
-#'            https://spark.apache.org/docs/latest/sql-data-sources-json.html#data-source-option}{
-#'            Data Source Option} in the version you use.
-#'
+# nolint start
+#'            \url{https://spark.apache.org/docs/latest/sql-data-sources-json.html#data-source-option}{Data Source Option} in the version you use.
+# nolint end
 #' @family SparkDataFrame functions
 #' @rdname write.json
 #' @name write.json
@@ -925,10 +924,9 @@ setMethod("write.json",
 #'             save mode (it is 'error' by default)
 #' @param ... additional argument(s) passed to the method.
 #'            You can find the ORC-specific options for writing ORC files in
-#'            \url{
-#'            https://spark.apache.org/docs/latest/sql-data-sources-orc.html#data-source-option}{
-#'            Data Source Option} in the version you use.
-#'
+# nolint start
+#'            \url{https://spark.apache.org/docs/latest/sql-data-sources-orc.html#data-source-option}{Data Source Option} in the version you use.
+# nolint end
 #' @family SparkDataFrame functions
 #' @aliases write.orc,SparkDataFrame,character-method
 #' @rdname write.orc
@@ -960,10 +958,9 @@ setMethod("write.orc",
 #'             save mode (it is 'error' by default)
 #' @param ... additional argument(s) passed to the method.
 #'            You can find the Parquet-specific options for writing Parquet files in
-#'            \url{
-#'            https://spark.apache.org/docs/latest/sql-data-sources-parquet.html#data-source-option
-#'            }{Data Source Option} in the version you use.
-#'
+# nolint start
+#'            \url{https://spark.apache.org/docs/latest/sql-data-sources-parquet.html#data-source-option}{Data Source Option} in the version you use.
+# nolint end
 #' @family SparkDataFrame functions
 #' @rdname write.parquet
 #' @name write.parquet
@@ -996,10 +993,9 @@ setMethod("write.parquet",
 #'             save mode (it is 'error' by default)
 #' @param ... additional argument(s) passed to the method.
 #'            You can find the text-specific options for writing text files in
-#'            \url{
-#'            https://spark.apache.org/docs/latest/sql-data-sources-text.html#data-source-option}{
-#'            Data Source Option} in the version you use.
-#'
+# nolint start
+#'            \url{https://spark.apache.org/docs/latest/sql-data-sources-text.html#data-source-option}{Data Source Option} in the version you use.
+# nolint end
 #' @family SparkDataFrame functions
 #' @aliases write.text,SparkDataFrame,character-method
 #' @rdname write.text
@@ -3912,8 +3908,7 @@ setMethod("isStreaming",
 #' @aliases write.stream,SparkDataFrame-method
 #' @rdname write.stream
 #' @name write.stream
-#' @examples
-#'\dontrun{
+#' @examples \dontrun{
 #' sparkR.session()
 #' df <- read.stream("socket", host = "localhost", port = 9999)
 #' isStreaming(df)
diff --git a/R/pkg/R/SQLContext.R b/R/pkg/R/SQLContext.R
index d86e38bf4b3b8..5adebade8b7eb 100644
--- a/R/pkg/R/SQLContext.R
+++ b/R/pkg/R/SQLContext.R
@@ -382,9 +382,9 @@ setMethod("toDF", signature(x = "RDD"),
 #' @param path Path of file to read. A vector of multiple paths is allowed.
 #' @param ... additional external data source specific named properties.
 #'            You can find the JSON-specific options for reading JSON files in
-#'            \url{
-#'            https://spark.apache.org/docs/latest/sql-data-sources-json.html#data-source-option}{
-#'            Data Source Option} in the version you use.
+# nolint start
+#'            \url{https://spark.apache.org/docs/latest/sql-data-sources-json.html#data-source-option}{Data Source Option} in the version you use.
+# nolint end
 #' @return SparkDataFrame
 #' @rdname read.json
 #' @examples
@@ -414,9 +414,9 @@ read.json <- function(path, ...) {
 #' @param path Path of file to read.
 #' @param ... additional external data source specific named properties.
 #'            You can find the ORC-specific options for reading ORC files in
-#'            \url{
-#'            https://spark.apache.org/docs/latest/sql-data-sources-orc.html#data-source-option}{
-#'            Data Source Option} in the version you use.
+# nolint start
+#'            \url{https://spark.apache.org/docs/latest/sql-data-sources-orc.html#data-source-option}{Data Source Option} in the version you use.
+# nolint end
 #' @return SparkDataFrame
 #' @rdname read.orc
 #' @name read.orc
@@ -439,9 +439,9 @@ read.orc <- function(path, ...) {
 #' @param path path of file to read. A vector of multiple paths is allowed.
 #' @param ... additional data source specific named properties.
 #'            You can find the Parquet-specific options for reading Parquet files in
-#'            \url{
-#'            https://spark.apache.org/docs/latest/sql-data-sources-parquet.html#data-source-option
-#'            }{Data Source Option} in the version you use.
+# nolint start
+#'            \url{https://spark.apache.org/docs/latest/sql-data-sources-parquet.html#data-source-option}{Data Source Option} in the version you use.
+# nolint end
 #' @return SparkDataFrame
 #' @rdname read.parquet
 #' @name read.parquet
@@ -468,9 +468,9 @@ read.parquet <- function(path, ...) {
 #' @param path Path of file to read. A vector of multiple paths is allowed.
 #' @param ... additional external data source specific named properties.
 #'            You can find the text-specific options for reading text files in
-#'            \url{
-#'            https://spark.apache.org/docs/latest/sql-data-sources-text.html#data-source-option}{
-#'            Data Source Option} in the version you use.
+# nolint start
+#'            \url{https://spark.apache.org/docs/latest/sql-data-sources-text.html#data-source-option}{Data Source Option} in the version you use.
+# nolint end
 #' @return SparkDataFrame
 #' @rdname read.text
 #' @examples
@@ -619,8 +619,9 @@ loadDF <- function(path = NULL, source = NULL, schema = NULL, ...) {
 #'
 #' Additional JDBC database connection properties can be set (...)
 #' You can find the JDBC-specific option and parameter documentation for reading tables via JDBC in
-#' \url{https://spark.apache.org/docs/latest/sql-data-sources-jdbc.html#data-source-option}{
-#' Data Source Option} in the version you use.
+# nolint start
+#' \url{https://spark.apache.org/docs/latest/sql-data-sources-jdbc.html#data-source-option}{Data Source Option} in the version you use.
+# nolint end
 #'
 #' Only one of partitionColumn or predicates should be set. Partitions of the table will be
 #' retrieved in parallel based on the \code{numPartitions} or by the predicates.
diff --git a/R/pkg/R/functions.R b/R/pkg/R/functions.R
index 48d4fe81c8c87..1377f0daa7360 100644
--- a/R/pkg/R/functions.R
+++ b/R/pkg/R/functions.R
@@ -264,18 +264,20 @@ NULL
 #'              additional named properties to control how it is converted and accepts the
 #'              same options as the JSON data source.
 #'              You can find the JSON-specific options for reading/writing JSON files in
-#'              \url{
-#'              https://spark.apache.org/docs/latest/sql-data-sources-json.html#data-source-option}{
-#'              Data Source Option} in the version you use.
+# nolint start
+#'              \url{https://spark.apache.org/docs/latest/sql-data-sources-json.html#data-source-option}{Data Source Option}
+# nolint end
+#'              in the version you use.
 #'          \item \code{to_json}: it supports the "pretty" option which enables pretty
 #'              JSON generation.
 #'          \item \code{to_csv}, \code{from_csv} and \code{schema_of_csv}: this contains
 #'              additional named properties to control how it is converted and accepts the
 #'              same options as the CSV data source.
 #'              You can find the CSV-specific options for reading/writing CSV files in
-#'              \url{
-#'              https://spark.apache.org/docs/latest/sql-data-sources-csv.html#data-source-option}{
-#'              Data Source Option} in the version you use.
+# nolint start
+#'              \url{https://spark.apache.org/docs/latest/sql-data-sources-csv.html#data-source-option}{Data Source Option}
+# nolint end
+#'              in the version you use.
 #'          \item \code{arrays_zip}, this contains additional Columns of arrays to be merged.
 #'          \item \code{map_concat}, this contains additional Columns of maps to be unioned.
 #'          }
@@ -3816,6 +3818,7 @@ setMethod("row_number",
 #'        Column, for example \code{unresolved_named_lambda_var("a", "b", "c")}
 #'        yields unresolved \code{a.b.c}
 #' @return Column object wrapping JVM UnresolvedNamedLambdaVariable
+#' @keywords internal
 unresolved_named_lambda_var <- function(...) {
   jc <- newJObject(
     "org.apache.spark.sql.Column",
@@ -3839,6 +3842,7 @@ unresolved_named_lambda_var <- function(...) {
 #' @param fun R \code{function} (unary, binary or ternary)
 #'        that transforms \code{Columns} into a \code{Column}
 #' @return JVM \code{LambdaFunction} object
+#' @keywords internal
 create_lambda <- function(fun) {
   as_jexpr <- function(x) callJMethod(x@jc, "expr")
 
@@ -3887,6 +3891,7 @@ create_lambda <- function(fun) {
 #' @param cols list of character or Column objects
 #' @param funs list of named list(fun = ..., expected_narg = ...)
 #' @return a \code{Column} representing name applied to cols with funs
+#' @keywords internal
 invoke_higher_order_function <- function(name, cols, funs) {
   as_jexpr <- function(x) {
     if (class(x) == "character") {
diff --git a/R/pkg/R/jobj.R b/R/pkg/R/jobj.R
index 4905e1fe5c61f..5c2cdddcfa36a 100644
--- a/R/pkg/R/jobj.R
+++ b/R/pkg/R/jobj.R
@@ -72,6 +72,7 @@ jobj <- function(objId) {
 #' @param x The JVM object reference
 #' @param ... further arguments passed to or from other methods
 #' @note print.jobj since 1.4.0
+#' @keywords internal
 print.jobj <- function(x, ...) {
   name <- getClassName.jobj(x)
   cat("Java ref type", name, "id", x$id, "\n", sep = " ")
diff --git a/R/pkg/R/schema.R b/R/pkg/R/schema.R
index 7044ede0cc58b..4c83e2e450cb6 100644
--- a/R/pkg/R/schema.R
+++ b/R/pkg/R/schema.R
@@ -95,6 +95,7 @@ structType.character <- function(x, ...) {
 #' @param x A StructType object
 #' @param ... further arguments passed to or from other methods
 #' @note print.structType since 1.4.0
+#' @keywords internal
 print.structType <- function(x, ...) {
   cat("StructType\n",
       sapply(x$fields(),
@@ -234,6 +235,7 @@ structField.character <- function(x, type, nullable = TRUE, ...) {
 #' @param x A StructField object
 #' @param ... further arguments passed to or from other methods
 #' @note print.structField since 1.4.0
+#' @keywords internal
 print.structField <- function(x, ...) {
   cat("StructField(name = \"", x$name(),
       "\", type = \"", x$dataType.toString(),
diff --git a/R/pkg/R/utils.R b/R/pkg/R/utils.R
index 264cbfc9ba929..ca8f8defdfdec 100644
--- a/R/pkg/R/utils.R
+++ b/R/pkg/R/utils.R
@@ -115,6 +115,7 @@ isRDD <- function(name, env) {
 #' hashCode("1") # 49
 #'}
 #' @note hashCode since 1.4.0
+#' @keywords internal
 hashCode <- function(key) {
   if (class(key) == "integer") {
     as.integer(key[[1]])
diff --git a/R/README.md b/R/pkg/README.md
similarity index 100%
rename from R/README.md
rename to R/pkg/README.md
diff --git a/R/pkg/pkgdown/_pkgdown_template.yml b/R/pkg/pkgdown/_pkgdown_template.yml
new file mode 100644
index 0000000000000..674606f5b5cdf
--- /dev/null
+++ b/R/pkg/pkgdown/_pkgdown_template.yml
@@ -0,0 +1,311 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+url: https://spark.apache.org/docs/{SPARK_VERSION}/api/R/
+
+template:
+  package: preferably
+  bootstrap: 5
+  params:
+    toggle: manual
+  includes:
+    before_title: |
+      <a class="navbar-brand" href="https://spark.apache.org/">
+        <img src="https://spark.apache.org/images/spark-logo-rev.svg" alt="" max-height="100%">
+      </a>
+
+authors:
+  Shivaram Venkataraman :
+    href: https://github.com/shivaram
+  Xiangrui Meng:
+    href: https://github.com/mengxr
+  Felix Cheung:
+    href: https://github.com/felixcheung
+  " The Apache Software Foundation":
+    href: "https://www.apache.org/"
+
+
+reference:
+
+- title: "Distributed Data Frame"
+- contents:
+  - SparkDataFrame-class
+  - GroupedData-class
+  - agg
+  - arrange
+  - approxQuantile
+  - as.data.frame
+  - attach,SparkDataFrame-method
+  - broadcast
+  - cache
+  - cacheTable
+  - checkpoint
+  - collect
+  - coltypes
+  - colnames
+  - count
+  - createDataFrame
+  - createExternalTable
+  - createOrReplaceTempView
+  - createTable
+  - crossJoin
+  - crosstab
+  - cube
+  - describe
+  - distinct
+  - dim
+  - drop
+  - dropDuplicates
+  - dropna
+  - dtypes
+  - except
+  - exceptAll
+  - explain
+  - filter
+  - getNumPartitions
+  - group_by
+  - head
+  - hint
+  - histogram
+  - insertInto
+  - intersect
+  - intersectAll
+  - isLocal
+  - isStreaming
+  - join
+  - limit
+  - localCheckpoint
+  - merge
+  - mutate
+  - ncol
+  - nrow
+  - orderBy
+  - persist
+  - pivot
+  - printSchema
+  - randomSplit
+  - rbind
+  - rename
+  - registerTempTable
+  - repartition
+  - repartitionByRange
+  - rollup
+  - sample
+  - sampleBy
+  - saveAsTable
+  - schema
+  - select
+  - selectExpr
+  - show
+  - showDF
+  - str
+  - storageLevel
+  - subset
+  - summary
+  - take
+  - tableToDF
+  - toJSON
+  - union
+  - unionAll
+  - unionByName
+  - unpersist
+  - with
+  - withColumn
+
+- title: "Data import and export"
+- contents:
+  - read.df
+  - read.jdbc
+  - read.json
+  - read.orc
+  - read.parquet
+  - read.text
+  - write.df
+  - write.jdbc
+  - write.json
+  - write.orc
+  - write.parquet
+  - write.text
+
+- title: "Column functions"
+- contents:
+  - column_aggregate_functions
+  - column_avro_functions
+  - column_collection_functions
+  - column_datetime_diff_functions
+  - column_math_functions
+  - column_misc_functions
+  - column_ml_functions
+  - column_nonaggregate_functions
+  - column_string_functions
+  - column_window_functions
+  - alias
+  - asc
+  - avg
+  - between
+  - cast
+  - column
+  - coalesce
+  - corr
+  - cov
+  - dropFields
+  - endsWith
+  - first
+  - last
+  - not
+  - otherwise
+  - startsWith
+  - substr
+  - timestamp_seconds
+  - withField
+  - over
+  - predict
+  - partitionBy
+  - rangeBetween
+  - rowsBetween
+  - windowOrderBy
+  - windowPartitionBy
+  - WindowSpec-class
+  - "%in%"
+  - "%<=>%"
+
+- title: "Schema Definitions"
+- contents:
+  - structField
+  - structType
+
+- title: "Structured Streaming"
+- contents:
+  - StreamingQuery-class
+  - awaitTermination
+  - isActive
+  - queryName
+  - lastProgress
+  - read.stream
+  - status
+  - stopQuery
+  - withWatermark
+  - write.stream
+
+- title: "Spark MLlib"
+  desc: "MLlib is Spark’s machine learning (ML) library"
+- contents:
+  - AFTSurvivalRegressionModel-class
+  - ALSModel-class
+  - BisectingKMeansModel-class
+  - DecisionTreeClassificationModel-class
+  - DecisionTreeRegressionModel-class
+  - FMClassificationModel-class
+  - FMRegressionModel-class
+  - FPGrowthModel-class
+  - GBTClassificationModel-class
+  - GBTRegressionModel-class
+  - GaussianMixtureModel-class
+  - GeneralizedLinearRegressionModel-class
+  - glm,formula,ANY,SparkDataFrame-method
+  - IsotonicRegressionModel-class
+  - KMeansModel-class
+  - KSTest-class
+  - LDAModel-class
+  - LinearRegressionModel-class
+  - LinearSVCModel-class
+  - LogisticRegressionModel-class
+  - MultilayerPerceptronClassificationModel-class
+  - NaiveBayesModel-class
+  - PowerIterationClustering-class
+  - PrefixSpan-class
+  - RandomForestClassificationModel-class
+  - RandomForestRegressionModel-class
+  - fitted
+  - freqItems
+  - spark.als
+  - spark.bisectingKmeans
+  - spark.decisionTree
+  - spark.fmClassifier
+  - spark.fmRegressor
+  - spark.fpGrowth
+  - spark.gaussianMixture
+  - spark.gbt
+  - spark.glm
+  - spark.isoreg
+  - spark.kmeans
+  - spark.kstest
+  - spark.lda
+  - spark.lm
+  - spark.logit
+  - spark.mlp
+  - spark.naiveBayes
+  - spark.assignClusters
+  - spark.findFrequentSequentialPatterns
+  - spark.randomForest
+  - spark.survreg
+  - spark.svmLinear
+  - read.ml
+  - write.ml
+
+- title: "Distributed R"
+- contents:
+  - dapply
+  - dapplyCollect
+  - gapply
+  - gapplyCollect
+  - spark.lapply
+
+- title: "SQL Catalog"
+- contents:
+  - currentDatabase
+  - dropTempTable
+  - dropTempView
+  - listColumns
+  - listDatabases
+  - listFunctions
+  - listTables
+  - refreshByPath
+  - refreshTable
+  - recoverPartitions
+  - tableNames
+  - tables
+  - uncacheTable
+
+- title: "Spark Session and Context"
+- contents:
+  - cancelJobGroup
+  - clearCache
+  - clearJobGroup
+  - getLocalProperty
+  - install.spark
+  - setCheckpointDir
+  - setCurrentDatabase
+  - setJobDescription
+  - setJobGroup
+  - setLocalProperty
+  - setLogLevel
+  - spark.addFile
+  - spark.getSparkFiles
+  - spark.getSparkFilesRootDirectory
+  - sparkR.conf
+  - sparkR.callJMethod
+  - sparkR.callJStatic
+  - sparkR.init
+  - sparkR.newJObject
+  - sparkR.session
+  - sparkR.session.stop
+  - sparkR.uiWebUrl
+  - sparkR.version
+  - sparkRHive.init
+  - sparkRSQL.init
+  - sql
diff --git a/R/pkg/pkgdown/extra.css b/R/pkg/pkgdown/extra.css
new file mode 100644
index 0000000000000..997789bf25ccb
--- /dev/null
+++ b/R/pkg/pkgdown/extra.css
@@ -0,0 +1,48 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+body, p, a {
+    font-family: -apple-system,BlinkMacSystemFont,Segoe UI,Helvetica,Arial,sans-serif,Apple Color Emoji,Segoe UI Emoji;
+    font-weight: 400;
+}
+
+h1 {
+    margin-top: 3rem !important;
+    font-size: 2.25rem !important;
+}
+
+h2 {
+    font-size: 2rem !important;
+}
+
+aside h2 {
+    margin-top: 2rem !important;
+    font-size: 1.25rem !important;
+}
+
+h3 {
+    font-size: 1.75rem !important;
+}
+
+.navbar-brand {
+    padding-top: .3125rem;
+    padding-bottom: .3125rem;
+    margin-right: 1rem;
+    font-size: 1.25rem;
+    text-decoration: none;
+    white-space: nowrap;
+}
diff --git a/R/pkg/vignettes/sparkr-vignettes.Rmd b/R/pkg/vignettes/sparkr-vignettes.Rmd
index 0ed0028eb5173..1f3dd13353ffe 100644
--- a/R/pkg/vignettes/sparkr-vignettes.Rmd
+++ b/R/pkg/vignettes/sparkr-vignettes.Rmd
@@ -27,7 +27,15 @@ vignette: >
  limitations under the License.
 -->
 
-```{r setup, include=FALSE}
+```{r dynamic-chunk-options, include=FALSE}
+# In GitHub lint job, we don't have full JVM build
+# SparkR vignette fails to evaluate
+GITHUB_ACTIONS <- tolower(Sys.getenv("GITHUB_ACTIONS")) == "true"
+EVAL_CHUNK <- !GITHUB_ACTIONS
+```
+
+
+```{r setup, include=FALSE, eval=EVAL_CHUNK}
 library(knitr)
 opts_hooks$set(eval = function(options) {
   # override eval to FALSE only on windows
@@ -53,11 +61,11 @@ SparkR is an R package that provides a light-weight frontend to use Apache Spark
 We begin with an example running on the local machine and provide an overview of the use of SparkR: data ingestion, data processing and machine learning.
 
 First, let's load and attach the package.
-```{r, message=FALSE}
+```{r, message=FALSE, eval=EVAL_CHUNK}
 library(SparkR)
 ```
 
-```{r, include=FALSE}
+```{r, include=FALSE, eval=EVAL_CHUNK}
 # disable eval if java version not supported
 override_eval <- tryCatch(!is.numeric(SparkR:::checkJavaVersion()),
           error = function(e) { TRUE },
@@ -75,11 +83,12 @@ if (override_eval) {
 
 We use default settings in which it runs in local mode. It auto downloads Spark package in the background if no previous installation is found. For more details about setup, see [Spark Session](#SetupSparkSession).
 
-```{r, include=FALSE}
+```{r, include=FALSE, eval=EVAL_CHUNK}
 install.spark()
 sparkR.session(master = "local[1]", sparkConfig = sparkSessionConfig, enableHiveSupport = FALSE)
 ```
-```{r, eval=FALSE}
+
+```{r, eval=EVAL_CHUNK}
 sparkR.session()
 ```
 
@@ -87,18 +96,18 @@ The operations in SparkR are centered around an R class called `SparkDataFrame`.
 
 `SparkDataFrame` can be constructed from a wide array of sources such as: structured data files, tables in Hive, external databases, or existing local R data frames. For example, we create a `SparkDataFrame` from a local R data frame,
 
-```{r}
+```{r, eval=EVAL_CHUNK}
 cars <- cbind(model = rownames(mtcars), mtcars)
 carsDF <- createDataFrame(cars)
 ```
 
 We can view the first few rows of the `SparkDataFrame` by `head` or `showDF` function.
-```{r}
+```{r, eval=EVAL_CHUNK}
 head(carsDF)
 ```
 
 Common data processing operations such as `filter` and `select` are supported on the `SparkDataFrame`.
-```{r}
+```{r, eval=EVAL_CHUNK}
 carsSubDF <- select(carsDF, "model", "mpg", "hp")
 carsSubDF <- filter(carsSubDF, carsSubDF$hp >= 200)
 head(carsSubDF)
@@ -106,13 +115,13 @@ head(carsSubDF)
 
 SparkR can use many common aggregation functions after grouping.
 
-```{r}
+```{r, eval=EVAL_CHUNK}
 carsGPDF <- summarize(groupBy(carsDF, carsDF$gear), count = n(carsDF$gear))
 head(carsGPDF)
 ```
 
 The results `carsDF` and `carsSubDF` are `SparkDataFrame` objects. To convert back to R `data.frame`, we can use `collect`. **Caution**: This can cause your interactive environment to run out of memory, though, because `collect()` fetches the entire distributed `DataFrame` to your client, which is acting as a Spark driver.
-```{r}
+```{r, eval=EVAL_CHUNK}
 carsGP <- collect(carsGPDF)
 class(carsGP)
 ```
@@ -120,13 +129,13 @@ class(carsGP)
 SparkR supports a number of commonly used machine learning algorithms. Under the hood, SparkR uses MLlib to train the model. Users can call `summary` to print a summary of the fitted model, `predict` to make predictions on new data, and `write.ml`/`read.ml` to save/load fitted models.
 
 SparkR supports a subset of R formula operators for model fitting, including ‘~’, ‘.’, ‘:’, ‘+’, and ‘-‘. We use linear regression as an example.
-```{r}
+```{r, eval=EVAL_CHUNK}
 model <- spark.glm(carsDF, mpg ~ wt + cyl)
 ```
 
 The result matches that returned by R `glm` function applied to the corresponding `data.frame` `mtcars` of `carsDF`. In fact, for Generalized Linear Model, we specifically expose `glm` for `SparkDataFrame` as well so that the above is equivalent to `model <- glm(mpg ~ wt + cyl, data = carsDF)`.
 
-```{r}
+```{r, eval=EVAL_CHUNK}
 summary(model)
 ```
 
@@ -186,7 +195,7 @@ sparkR.session(spark.sql.warehouse.dir = spark_warehouse_path)
 SparkR can connect to remote Spark clusters. [Cluster Mode Overview](https://spark.apache.org/docs/latest/cluster-overview.html) is a good introduction to different Spark cluster modes.
 
 When connecting SparkR to a remote Spark cluster, make sure that the Spark version and Hadoop version on the machine match the corresponding versions on the cluster. Current SparkR package is compatible with
-```{r, echo=FALSE, tidy = TRUE}
+```{r, echo=FALSE, tidy = TRUE, eval=EVAL_CHUNK}
 paste("Spark", packageVersion("SparkR"))
 ```
 It should be used both on the local computer and on the remote cluster.
@@ -208,7 +217,7 @@ Yarn cluster mode is not supported in the current version.
 
 ### Local Data Frame
 The simplest way is to convert a local R data frame into a `SparkDataFrame`. Specifically we can use `as.DataFrame` or `createDataFrame` and pass in the local R data frame to create a `SparkDataFrame`. As an example, the following creates a `SparkDataFrame` based using the `faithful` dataset from R.
-```{r}
+```{r, eval=EVAL_CHUNK}
 df <- as.DataFrame(faithful)
 head(df)
 ```
@@ -231,7 +240,7 @@ The data sources API natively supports JSON formatted input files. Note that the
 
 Let's take a look at the first two lines of the raw JSON file used here.
 
-```{r}
+```{r, eval=EVAL_CHUNK}
 filePath <- paste0(sparkR.conf("spark.home"),
                          "/examples/src/main/resources/people.json")
 readLines(filePath, n = 2L)
@@ -239,19 +248,19 @@ readLines(filePath, n = 2L)
 
 We use `read.df` to read that into a `SparkDataFrame`.
 
-```{r}
+```{r, eval=EVAL_CHUNK}
 people <- read.df(filePath, "json")
 count(people)
 head(people)
 ```
 
 SparkR automatically infers the schema from the JSON file.
-```{r}
+```{r, eval=EVAL_CHUNK}
 printSchema(people)
 ```
 
 If we want to read multiple JSON files, `read.json` can be used.
-```{r}
+```{r, eval=EVAL_CHUNK}
 people <- read.json(paste0(Sys.getenv("SPARK_HOME"),
                            c("/examples/src/main/resources/people.json",
                              "/examples/src/main/resources/people.json")))
@@ -298,12 +307,12 @@ dplyr | SparkR
 Other differences will be mentioned in the specific methods.
 
 We use the `SparkDataFrame` `carsDF` created above. We can get basic information about the `SparkDataFrame`.
-```{r}
+```{r, eval=EVAL_CHUNK}
 carsDF
 ```
 
 Print out the schema in tree format.
-```{r}
+```{r, eval=EVAL_CHUNK}
 printSchema(carsDF)
 ```
 
@@ -314,12 +323,12 @@ printSchema(carsDF)
 SparkDataFrames support a number of functions to do structured data processing. Here we include some basic examples and a complete list can be found in the [API](https://spark.apache.org/docs/latest/api/R/index.html) docs:
 
 You can also pass in column name as strings.
-```{r}
+```{r, eval=EVAL_CHUNK}
 head(select(carsDF, "mpg"))
 ```
 
 Filter the SparkDataFrame to only retain rows with mpg less than 20 miles/gallon.
-```{r}
+```{r, eval=EVAL_CHUNK}
 head(filter(carsDF, carsDF$mpg < 20))
 ```
 
@@ -335,20 +344,20 @@ A number of widely used functions are supported to aggregate data after grouping
 
 For example we can compute a histogram of the number of cylinders in the `mtcars` dataset as shown below.
 
-```{r}
+```{r, eval=EVAL_CHUNK}
 numCyl <- summarize(groupBy(carsDF, carsDF$cyl), count = n(carsDF$cyl))
 head(numCyl)
 ```
 
 Use `cube` or `rollup` to compute subtotals across multiple dimensions.
 
-```{r}
+```{r, eval=EVAL_CHUNK}
 mean(cube(carsDF, "cyl", "gear", "am"), "mpg")
 ```
 
 generates groupings for {(`cyl`, `gear`, `am`), (`cyl`, `gear`), (`cyl`), ()}, while
 
-```{r}
+```{r, eval=EVAL_CHUNK}
 mean(rollup(carsDF, "cyl", "gear", "am"), "mpg")
 ```
 
@@ -359,7 +368,7 @@ generates groupings for all possible combinations of grouping columns.
 
 SparkR also provides a number of functions that can directly applied to columns for data processing and during aggregation. The example below shows the use of basic arithmetic functions.
 
-```{r}
+```{r, eval=EVAL_CHUNK}
 carsDF_km <- carsDF
 carsDF_km$kmpg <- carsDF_km$mpg * 1.61
 head(select(carsDF_km, "model", "mpg", "kmpg"))
@@ -378,7 +387,7 @@ Formally, the *group* mentioned above is called the *frame*. Every input row can
 Window functions are often used in conjunction with the following functions: `windowPartitionBy`, `windowOrderBy`, `partitionBy`, `orderBy`, `over`. To illustrate this we next look at an example.
 
 We still use the `mtcars` dataset. The corresponding `SparkDataFrame` is `carsDF`. Suppose for each number of cylinders, we want to calculate the rank of each car in `mpg` within the group.
-```{r}
+```{r, eval=EVAL_CHUNK}
 carsSubDF <- select(carsDF, "model", "mpg", "cyl")
 ws <- orderBy(windowPartitionBy("cyl"), "mpg")
 carsRank <- withColumn(carsSubDF, "rank", over(rank(), ws))
@@ -403,7 +412,7 @@ In SparkR, we support several kinds of user-defined functions (UDFs).
 
 We convert `mpg` to `kmpg` (kilometers per gallon). `carsSubDF` is a `SparkDataFrame` with a subset of `carsDF` columns.
 
-```{r}
+```{r, eval=EVAL_CHUNK}
 carsSubDF <- select(carsDF, "model", "mpg")
 schema <- "model STRING, mpg DOUBLE, kmpg DOUBLE"
 out <- dapply(carsSubDF, function(x) { x <- cbind(x, x$mpg * 1.61) }, schema)
@@ -412,7 +421,7 @@ head(collect(out))
 
 Like `dapply`, `dapplyCollect` can apply a function to each partition of a `SparkDataFrame` and collect the result back. The output of the function should be a `data.frame`, but no schema is required in this case. Note that `dapplyCollect` can fail if the output of the UDF on all partitions cannot be pulled into the driver's memory.
 
-```{r}
+```{r, eval=EVAL_CHUNK}
 out <- dapplyCollect(
          carsSubDF,
          function(x) {
@@ -424,7 +433,7 @@ head(out, 3)
 #### Apply by Group
 `gapply` can apply a function to each group of a `SparkDataFrame`. The function is to be applied to each group of the `SparkDataFrame` and should have only two parameters: grouping key and R `data.frame` corresponding to that key. The groups are chosen from `SparkDataFrames` column(s). The output of function should be a `data.frame`. Schema specifies the row format of the resulting `SparkDataFrame`. It must represent R function’s output schema on the basis of Spark data types. The column names of the returned `data.frame` are set by user. See [here](#DataTypes) for mapping between R and Spark.
 
-```{r}
+```{r, eval=EVAL_CHUNK}
 schema <- structType(structField("cyl", "double"), structField("max_mpg", "double"))
 result <- gapply(
     carsDF,
@@ -438,7 +447,7 @@ head(arrange(result, "max_mpg", decreasing = TRUE))
 
 Like `gapply`, `gapplyCollect` can apply a function to each partition of a `SparkDataFrame` and collect the result back to R `data.frame`. The output of the function should be a `data.frame` but no schema is required in this case. Note that `gapplyCollect` can fail if the output of the UDF on all partitions cannot be pulled into the driver's memory.
 
-```{r}
+```{r, eval=EVAL_CHUNK}
 result <- gapplyCollect(
     carsDF,
     "cyl",
@@ -456,7 +465,7 @@ Similar to `lapply` in native R, `spark.lapply` runs a function over a list of e
 
 We use `svm` in package `e1071` as an example. We use all default settings except for varying costs of constraints violation. `spark.lapply` can train those different models in parallel.
 
-```{r}
+```{r, eval=EVAL_CHUNK}
 costs <- exp(seq(from = log(1), to = log(1000), length.out = 5))
 train <- function(cost) {
   stopifnot(requireNamespace("e1071", quietly = TRUE))
@@ -466,24 +475,24 @@ train <- function(cost) {
 ```
 
 Return a list of model's summaries.
-```{r}
+```{r, eval=EVAL_CHUNK}
 model.summaries <- spark.lapply(costs, train)
 ```
 
-```{r}
+```{r, eval=EVAL_CHUNK}
 class(model.summaries)
 ```
 
 
 To avoid lengthy display, we only present the partial result of the second fitted model. You are free to inspect other models as well.
-```{r, include=FALSE}
+```{r, include=FALSE, eval=EVAL_CHUNK}
 ops <- options()
 options(max.print=40)
 ```
-```{r}
+```{r, eval=EVAL_CHUNK}
 print(model.summaries[[2]])
 ```
-```{r, include=FALSE}
+```{r, include=FALSE, eval=EVAL_CHUNK}
 options(ops)
 ```
 
@@ -491,19 +500,19 @@ options(ops)
 ### SQL Queries
 A `SparkDataFrame` can also be registered as a temporary view in Spark SQL so that one can run SQL queries over its data. The sql function enables applications to run SQL queries programmatically and returns the result as a `SparkDataFrame`.
 
-```{r}
+```{r, eval=EVAL_CHUNK}
 people <- read.df(paste0(sparkR.conf("spark.home"),
                          "/examples/src/main/resources/people.json"), "json")
 ```
 
 Register this `SparkDataFrame` as a temporary view.
 
-```{r}
+```{r, eval=EVAL_CHUNK}
 createOrReplaceTempView(people, "people")
 ```
 
 SQL statements can be run using the sql method.
-```{r}
+```{r, eval=EVAL_CHUNK}
 teenagers <- sql("SELECT name FROM people WHERE age >= 13 AND age <= 19")
 head(teenagers)
 ```
@@ -577,18 +586,18 @@ For most above, SparkR supports **R formula operators**, including `~`, `.`, `:`
 ### Training and Test Sets
 
 We can easily split `SparkDataFrame` into random training and test sets by the `randomSplit` function. It returns a list of split `SparkDataFrames` with provided `weights`. We use `carsDF` as an example and want to have about $70%$ training data and $30%$ test data.
-```{r}
+```{r, eval=EVAL_CHUNK}
 splitDF_list <- randomSplit(carsDF, c(0.7, 0.3), seed = 0)
 carsDF_train <- splitDF_list[[1]]
 carsDF_test <- splitDF_list[[2]]
 ```
 
-```{r}
+```{r, eval=EVAL_CHUNK}
 count(carsDF_train)
 head(carsDF_train)
 ```
 
-```{r}
+```{r, eval=EVAL_CHUNK}
 count(carsDF_test)
 head(carsDF_test)
 ```
@@ -601,7 +610,7 @@ head(carsDF_test)
 This is a binary classifier. We use a simple example to show how to use `spark.svmLinear`
 for binary classification.
 
-```{r}
+```{r, eval=EVAL_CHUNK}
 # load training data and create a DataFrame
 t <- as.data.frame(Titanic)
 training <- createDataFrame(t)
@@ -611,7 +620,7 @@ summary(model)
 ```
 
 Predict values on training data
-```{r}
+```{r, eval=EVAL_CHUNK}
 prediction <- predict(model, training)
 head(select(prediction, "Class", "Sex", "Age", "Freq", "Survived", "prediction"))
 ```
@@ -627,7 +636,7 @@ We use a simple example to demonstrate `spark.logit` usage. In general, there ar
 and 3). Obtain the coefficient matrix of the fitted model using `summary` and use the model for prediction with `predict`.
 
 Binomial logistic regression
-```{r}
+```{r, eval=EVAL_CHUNK}
 t <- as.data.frame(Titanic)
 training <- createDataFrame(t)
 model <- spark.logit(training, Survived ~ ., regParam = 0.04741301)
@@ -635,13 +644,13 @@ summary(model)
 ```
 
 Predict values on training data
-```{r}
+```{r, eval=EVAL_CHUNK}
 fitted <- predict(model, training)
 head(select(fitted, "Class", "Sex", "Age", "Freq", "Survived", "prediction"))
 ```
 
 Multinomial logistic regression against three classes
-```{r}
+```{r, eval=EVAL_CHUNK}
 t <- as.data.frame(Titanic)
 training <- createDataFrame(t)
 # Note in this case, Spark infers it is multinomial logistic regression, so family = "multinomial" is optional.
@@ -673,7 +682,7 @@ MLPC employs backpropagation for learning the model. We use the logistic loss fu
 `spark.mlp` requires at least two columns in `data`: one named `"label"` and the other one `"features"`. The `"features"` column should be in libSVM-format.
 
 We use Titanic data set to show how to use `spark.mlp` in classification.
-```{r}
+```{r, eval=EVAL_CHUNK}
 t <- as.data.frame(Titanic)
 training <- createDataFrame(t)
 # fit a Multilayer Perceptron Classification Model
@@ -681,18 +690,18 @@ model <- spark.mlp(training, Survived ~ Age + Sex, blockSize = 128, layers = c(2
 ```
 
 To avoid lengthy display, we only present partial results of the model summary. You can check the full result from your sparkR shell.
-```{r, include=FALSE}
+```{r, include=FALSE, eval=EVAL_CHUNK}
 ops <- options()
 options(max.print=5)
 ```
-```{r}
+```{r, eval=EVAL_CHUNK}
 # check the summary of the fitted model
 summary(model)
 ```
-```{r, include=FALSE}
+```{r, include=FALSE, eval=EVAL_CHUNK}
 options(ops)
 ```
-```{r}
+```{r, eval=EVAL_CHUNK}
 # make predictions use the fitted model
 predictions <- predict(model, training)
 head(select(predictions, predictions$prediction))
@@ -702,7 +711,7 @@ head(select(predictions, predictions$prediction))
 
 Naive Bayes model assumes independence among the features. `spark.naiveBayes` fits a [Bernoulli naive Bayes model](https://en.wikipedia.org/wiki/Naive_Bayes_classifier#Bernoulli_naive_Bayes) against a SparkDataFrame. The data should be all categorical. These models are often used for document classification.
 
-```{r}
+```{r, eval=EVAL_CHUNK}
 titanic <- as.data.frame(Titanic)
 titanicDF <- createDataFrame(titanic[titanic$Freq > 0, -5])
 naiveBayesModel <- spark.naiveBayes(titanicDF, Survived ~ Class + Sex + Age)
@@ -718,7 +727,7 @@ Factorization Machines for classification problems.
 For background and details about the implementation of factorization machines,
 refer to the [Factorization Machines section](https://spark.apache.org/docs/latest/ml-classification-regression.html#factorization-machines).
 
-```{r}
+```{r, eval=EVAL_CHUNK}
 t <- as.data.frame(Titanic)
 training <- createDataFrame(t)
 
@@ -735,7 +744,7 @@ Survival analysis studies the expected duration of time until an event happens,
 
 Accelerated Failure Time (AFT) model is a parametric survival model for censored data that assumes the effect of a covariate is to accelerate or decelerate the life course of an event by some constant. For more information, refer to the Wikipedia page [AFT Model](https://en.wikipedia.org/wiki/Accelerated_failure_time_model) and the references there. Different from a [Proportional Hazards Model](https://en.wikipedia.org/wiki/Proportional_hazards_model) designed for the same purpose, the AFT model is easier to parallelize because each instance contributes to the objective function independently.
 
-```{r, warning=FALSE}
+```{r, warning=FALSE, eval=EVAL_CHUNK}
 library(survival)
 ovarianDF <- createDataFrame(ovarian)
 aftModel <- spark.survreg(ovarianDF, Surv(futime, fustat) ~ ecog_ps + rx)
@@ -772,23 +781,23 @@ For more information regarding the families and their link functions, see the Wi
 
 We use the `mtcars` dataset as an illustration. The corresponding `SparkDataFrame` is `carsDF`. After fitting the model, we print out a summary and see the fitted values by making predictions on the original dataset. We can also pass into a new `SparkDataFrame` of same schema to predict on new data.
 
-```{r}
+```{r, eval=EVAL_CHUNK}
 gaussianGLM <- spark.glm(carsDF, mpg ~ wt + hp)
 summary(gaussianGLM)
 ```
 When doing prediction, a new column called `prediction` will be appended. Let's look at only a subset of columns here.
-```{r}
+```{r, eval=EVAL_CHUNK}
 gaussianFitted <- predict(gaussianGLM, carsDF)
 head(select(gaussianFitted, "model", "prediction", "mpg", "wt", "hp"))
 ```
 
 The following is the same fit using the tweedie family:
-```{r}
+```{r, eval=EVAL_CHUNK}
 tweedieGLM1 <- spark.glm(carsDF, mpg ~ wt + hp, family = "tweedie", var.power = 0.0)
 summary(tweedieGLM1)
 ```
 We can try other distributions in the tweedie family, for example, a compound Poisson distribution with a log link:
-```{r}
+```{r, eval=EVAL_CHUNK}
 tweedieGLM2 <- spark.glm(carsDF, mpg ~ wt + hp, family = "tweedie",
                          var.power = 1.2, link.power = 0.0)
 summary(tweedieGLM2)
@@ -811,7 +820,7 @@ There are a few more arguments that may be useful.
 
 We use an artificial example to show the use.
 
-```{r}
+```{r, eval=EVAL_CHUNK}
 y <- c(3.0, 6.0, 8.0, 5.0, 7.0)
 x <- c(1.0, 2.0, 3.5, 3.0, 4.0)
 w <- rep(1.0, 5)
@@ -832,7 +841,7 @@ In the prediction stage, based on the fitted monotone piecewise function, the ru
 
 For example, when the input is $3.2$, the two closest feature values are $3.0$ and $3.5$, then predicted value would be a linear interpolation between the predicted values at $3.0$ and $3.5$.
 
-```{r}
+```{r, eval=EVAL_CHUNK}
 newDF <- createDataFrame(data.frame(x = c(1.5, 3.2)))
 head(predict(isoregModel, newDF))
 ```
@@ -841,7 +850,7 @@ head(predict(isoregModel, newDF))
 
 Linear regression model.
 
-```{r}
+```{r, eval=EVAL_CHUNK}
 model <- spark.lm(carsDF, mpg ~ wt + hp)
 
 summary(model)
@@ -856,7 +865,7 @@ Factorization Machines for regression problems.
 For background and details about the implementation of factorization machines,
 refer to the [Factorization Machines section](https://spark.apache.org/docs/latest/ml-classification-regression.html#factorization-machines).
 
-```{r}
+```{r, eval=EVAL_CHUNK}
 model <- spark.fmRegressor(carsDF, mpg ~ wt + hp)
 summary(model)
 predictions <- predict(model, carsDF)
@@ -870,7 +879,7 @@ Users can call `summary` to get a summary of the fitted model, `predict` to make
 
 We use the `Titanic` dataset to train a decision tree and make predictions:
 
-```{r}
+```{r, eval=EVAL_CHUNK}
 t <- as.data.frame(Titanic)
 df <- createDataFrame(t)
 dtModel <- spark.decisionTree(df, Survived ~ ., type = "classification", maxDepth = 2)
@@ -886,7 +895,7 @@ Users can call `summary` to get a summary of the fitted model, `predict` to make
 
 We use the `Titanic` dataset to train a gradient-boosted tree and make predictions:
 
-```{r}
+```{r, eval=EVAL_CHUNK}
 t <- as.data.frame(Titanic)
 df <- createDataFrame(t)
 gbtModel <- spark.gbt(df, Survived ~ ., type = "classification", maxDepth = 2, maxIter = 2)
@@ -902,7 +911,7 @@ Users can call `summary` to get a summary of the fitted model, `predict` to make
 
 In the following example, we use the `Titanic` dataset to train a random forest and make predictions:
 
-```{r}
+```{r, eval=EVAL_CHUNK}
 t <- as.data.frame(Titanic)
 df <- createDataFrame(t)
 rfModel <- spark.randomForest(df, Survived ~ ., type = "classification", maxDepth = 2, numTrees = 2)
@@ -915,7 +924,7 @@ head(select(predictions, "Class", "Sex", "Age", "Freq", "Survived", "prediction"
 
 `spark.bisectingKmeans` is a kind of [hierarchical clustering](https://en.wikipedia.org/wiki/Hierarchical_clustering) using a divisive (or "top-down") approach: all observations start in one cluster, and splits are performed recursively as one moves down the hierarchy.
 
-```{r}
+```{r, eval=EVAL_CHUNK}
 t <- as.data.frame(Titanic)
 training <- createDataFrame(t)
 model <- spark.bisectingKmeans(training, Class ~ Survived, k = 4)
@@ -929,7 +938,7 @@ head(select(fitted, "Class", "prediction"))
 `spark.gaussianMixture` fits multivariate [Gaussian Mixture Model](https://en.wikipedia.org/wiki/Mixture_model#Multivariate_Gaussian_mixture_model) (GMM) against a `SparkDataFrame`. [Expectation-Maximization](https://en.wikipedia.org/wiki/Expectation%E2%80%93maximization_algorithm) (EM) is used to approximate the maximum likelihood estimator (MLE) of the model.
 
 We use a simulated example to demonstrate the usage.
-```{r}
+```{r, eval=EVAL_CHUNK}
 X1 <- data.frame(V1 = rnorm(4), V2 = rnorm(4))
 X2 <- data.frame(V1 = rnorm(6, 3), V2 = rnorm(6, 4))
 data <- rbind(X1, X2)
@@ -944,7 +953,7 @@ head(select(gmmFitted, "V1", "V2", "prediction"))
 
 `spark.kmeans` fits a $k$-means clustering model against a `SparkDataFrame`. As an unsupervised learning method, we don't need a response variable. Hence, the left hand side of the R formula should be left blank. The clustering is based only on the variables on the right hand side.
 
-```{r}
+```{r, eval=EVAL_CHUNK}
 kmeansModel <- spark.kmeans(carsDF, ~ mpg + hp + wt, k = 3)
 summary(kmeansModel)
 kmeansPredictions <- predict(kmeansModel, carsDF)
@@ -976,7 +985,7 @@ Two more functions are provided for the fitted model.
 For more information, see the help document `?spark.lda`.
 
 Let's look an artificial example.
-```{r}
+```{r, eval=EVAL_CHUNK}
 corpus <- data.frame(features = c(
   "1 2 6 0 2 3 1 1 0 0 3",
   "1 3 0 1 3 0 0 2 0 0 1",
@@ -995,12 +1004,12 @@ model <- spark.lda(data = corpusDF, k = 5, optimizer = "em")
 summary(model)
 ```
 
-```{r}
+```{r, eval=EVAL_CHUNK}
 posterior <- spark.posterior(model, corpusDF)
 head(posterior)
 ```
 
-```{r}
+```{r, eval=EVAL_CHUNK}
 perplexity <- spark.perplexity(model, corpusDF)
 perplexity
 ```
@@ -1038,7 +1047,7 @@ head(predicted)
 
 Power Iteration Clustering (PIC) is a scalable graph clustering algorithm. `spark.assignClusters` method runs the PIC algorithm and returns a cluster assignment for each input vertex.
 
-```{r}
+```{r, eval=EVAL_CHUNK}
 df <- createDataFrame(list(list(0L, 1L, 1.0), list(0L, 2L, 1.0),
                            list(1L, 2L, 1.0), list(3L, 4L, 1.0),
                            list(4L, 0L, 0.1)),
@@ -1050,7 +1059,7 @@ head(spark.assignClusters(df, initMode = "degree", weightCol = "weight"))
 
 `spark.fpGrowth` executes FP-growth algorithm to mine frequent itemsets on a `SparkDataFrame`. `itemsCol` should be an array of values.
 
-```{r}
+```{r, eval=EVAL_CHUNK}
 df <- selectExpr(createDataFrame(data.frame(rawItems = c(
   "T,R,U", "T,S", "V,R", "R,U,T,V", "R,S", "V,S,U", "U,R", "S,T", "V,R", "V,U,S",
   "T,V,U", "R,V", "T,S", "T,S", "S,T", "S,U", "T,R", "V,R", "S,V", "T,S,U"
@@ -1061,19 +1070,19 @@ fpm <- spark.fpGrowth(df, minSupport = 0.2, minConfidence = 0.5)
 
 `spark.freqItemsets` method can be used to retrieve a `SparkDataFrame` with the frequent itemsets.
 
-```{r}
+```{r, eval=EVAL_CHUNK}
 head(spark.freqItemsets(fpm))
 ```
 
 `spark.associationRules` returns a `SparkDataFrame` with the association rules.
 
-```{r}
+```{r, eval=EVAL_CHUNK}
 head(spark.associationRules(fpm))
 ```
 
 We can make predictions based on the `antecedent`.
 
-```{r}
+```{r, eval=EVAL_CHUNK}
 head(predict(fpm, df))
 ```
 
@@ -1081,7 +1090,7 @@ head(predict(fpm, df))
 
 `spark.findFrequentSequentialPatterns` method can be used to find the complete set of frequent sequential patterns in the input sequences of itemsets.
 
-```{r}
+```{r, eval=EVAL_CHUNK}
 df <- createDataFrame(list(list(list(list(1L, 2L), list(3L))),
                            list(list(list(1L), list(3L, 2L), list(1L, 2L))),
                            list(list(list(1L, 2L), list(5L))),
@@ -1101,7 +1110,7 @@ In the following example, we test whether the `Titanic` dataset's `Freq` column
 follows a normal distribution.  We set the parameters of the normal distribution using
 the mean and standard deviation of the sample.
 
-```{r}
+```{r, eval=EVAL_CHUNK}
 t <- as.data.frame(Titanic)
 df <- createDataFrame(t)
 freqStats <- head(select(df, mean(df$Freq), sd(df$Freq)))
@@ -1116,7 +1125,7 @@ testSummary
 
 ### Model Persistence
 The following example shows how to save/load an ML model in SparkR.
-```{r}
+```{r, eval=EVAL_CHUNK}
 t <- as.data.frame(Titanic)
 training <- createDataFrame(t)
 gaussianGLM <- spark.glm(training, Freq ~ Sex + Age, family = "gaussian")
@@ -1276,10 +1285,10 @@ env | map
 
 * [SparkR: Scaling R Programs with Spark](https://people.csail.mit.edu/matei/papers/2016/sigmod_sparkr.pdf), Shivaram Venkataraman, Zongheng Yang, Davies Liu, Eric Liang, Hossein Falaki, Xiangrui Meng, Reynold Xin, Ali Ghodsi, Michael Franklin, Ion Stoica, and Matei Zaharia. SIGMOD 2016. June 2016.
 
-```{r, echo=FALSE}
+```{r, echo=FALSE, eval=EVAL_CHUNK}
 sparkR.session.stop()
 ```
 
-```{r cleanup, include=FALSE}
+```{r cleanup, include=FALSE, eval=EVAL_CHUNK}
 SparkR:::uninstallDownloadedSpark()
 ```
diff --git a/dev/create-release/spark-rm/Dockerfile b/dev/create-release/spark-rm/Dockerfile
index 83752bd941d25..ffd60c07af0c4 100644
--- a/dev/create-release/spark-rm/Dockerfile
+++ b/dev/create-release/spark-rm/Dockerfile
@@ -80,8 +80,11 @@ RUN apt-get clean && apt-get update && $APT_INSTALL gnupg ca-certificates && \
   $APT_INSTALL r-base r-base-dev && \
   $APT_INSTALL libcurl4-openssl-dev libgit2-dev libssl-dev libxml2-dev && \
   $APT_INSTALL texlive-latex-base texlive texlive-fonts-extra texinfo qpdf && \
+  $APT_INSTALL libfontconfig1-dev libharfbuzz-dev libfribidi-dev libfreetype6-dev libpng-dev libtiff5-dev libjpeg-dev && \
   Rscript -e "install.packages(c('curl', 'xml2', 'httr', 'devtools', 'testthat', 'knitr', 'rmarkdown', 'roxygen2', 'e1071', 'survival'), repos='https://cloud.r-project.org/')" && \
   Rscript -e "devtools::install_github('jimhester/lintr')" && \
+  Rscript -e "devtools::install_version('pkgdown', version='2.0.1', repos='https://cloud.r-project.org')" && \
+  Rscript -e "devtools::install_version('preferably', version='0.4', repos='https://cloud.r-project.org')" && \
   # Install tools needed to build the documentation.
   $APT_INSTALL ruby2.7 ruby2.7-dev && \
   gem install --no-document $GEM_PKGS
diff --git a/docs/README.md b/docs/README.md
index d5d04c6d35a9a..5e9a187ea3ab6 100644
--- a/docs/README.md
+++ b/docs/README.md
@@ -56,6 +56,8 @@ and install these libraries:
 ```sh
 $ sudo Rscript -e 'install.packages(c("knitr", "devtools", "testthat", "rmarkdown"), repos="https://cloud.r-project.org/")'
 $ sudo Rscript -e 'devtools::install_version("roxygen2", version = "7.1.1", repos="https://cloud.r-project.org/")'
+$ sudo Rscript -e "devtools::install_version('pkgdown', version='2.0.1', repos='https://cloud.r-project.org')"
+$ sudo Rscript -e "devtools::install_version('preferably', version='0.4', repos='https://cloud.r-project.org')"
 ```
 
 Note: Other versions of roxygen2 might work in SparkR documentation generation but `RoxygenNote` field in `$SPARK_HOME/R/pkg/DESCRIPTION` is 7.1.1, which is updated if the version is mismatched.
diff --git a/docs/_plugins/copy_api_dirs.rb b/docs/_plugins/copy_api_dirs.rb
index 35d5dc97c2506..28d5e0d82c93a 100644
--- a/docs/_plugins/copy_api_dirs.rb
+++ b/docs/_plugins/copy_api_dirs.rb
@@ -152,11 +152,8 @@
     puts "Making directory api/R"
     mkdir_p "api/R"
 
-    puts "cp -r ../R/pkg/html/. api/R"
-    cp_r("../R/pkg/html/.", "api/R")
-
-    puts "cp ../R/pkg/DESCRIPTION api"
-    cp("../R/pkg/DESCRIPTION", "api")
+    puts "cp -r ../R/pkg/docs/. api/R"
+    cp_r("../R/pkg/docs/.", "api/R")
   end
 
   if not (ENV['SKIP_SQLDOC'] == '1')