Auto build README and delete analysis.R (#1)

* GHA now automates rendering of README.Rmd to README.md * Images are now SVG rather than PNG files. They are better quality.
imanuelcostigan · Jan 1, 2022 · 1c9eee3 · 1c9eee3
1 parent 2683bc9
commit 1c9eee3
Show file tree

Hide file tree

Showing 12 changed files with 8,444 additions and 404 deletions.
diff --git a/.Rbuildignore b/.Rbuildignore
@@ -0,0 +1 @@
+^\.github$
diff --git a/.github/.gitignore b/.github/.gitignore
@@ -0,0 +1 @@
+*.html
diff --git a/.github/workflows/render-rmarkdown.yaml b/.github/workflows/render-rmarkdown.yaml
@@ -0,0 +1,31 @@
+# Workflow derived from https://github.com/r-lib/actions/tree/v2/examples
+# Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help
+on:
+  push:
+    paths: ['**.Rmd']
+
+name: render-rmarkdown
+
+jobs:
+  render-rmarkdown:
+    runs-on: ubuntu-latest
+    env:
+      GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
+    steps:
+      - name: Checkout repo
+        uses: actions/checkout@v2
+        with:
+          fetch-depth: 0
+      - uses: r-lib/actions/setup-pandoc@v2
+      - uses: r-lib/actions/setup-r@v2
+      - uses: r-lib/actions/setup-renv@v2
+      - name: Render Rmarkdown files
+        run: |
+          RMD_PATH=($(git diff --name-only ${{ github.event.before }} ${{ github.sha }} | grep '[.]Rmd$'))
+          Rscript -e 'for (f in commandArgs(TRUE)) if (file.exists(f)) rmarkdown::render(f)' ${RMD_PATH[*]}
+      - name: Commit results
+        run: |
+          git config --local user.name "$GITHUB_ACTOR"
+          git config --local user.email "[email protected]"
+          git commit ${RMD_PATH[*]/.Rmd/.md} -m 'Re-build Rmarkdown files' || echo "No changes to commit"
+          git push origin || echo "No changes to commit"
diff --git a/.gitignore b/.gitignore
@@ -2,3 +2,4 @@
 .Rhistory
 .RData
 .Ruserdata
+README.html
diff --git a/README.Rmd b/README.Rmd
@@ -1,7 +1,7 @@
 ---
-title: "COVID benchmark"
+title: "Benchmarking countries' COVID indicators"
 author: "Imanuel Costigan"
-date: "01/01/2022"
+date: Sys.Date()
 output:
   md_document:
     variant: gfm
@@ -10,38 +10,143 @@ editor_options:
 ---
 
 ```{r setup, include=FALSE}
-knitr::opts_chunk$set(echo = TRUE, dev = "ragg_png")
+knitr::opts_chunk$set(
+  echo = TRUE, 
+  dev = "svg",
+  fig.path = "fig/"
+)
+library(tidyverse)
 ```
 
 # COVID benchmarking
 
 This repository contains R code that I have used to benchmark a country against
-other countries. 
+other countries.I have benchmarked Australia against a selection of OECD 
+countries.
 
-The core code is in the `analysis.R` file. All code and the associated runtime
-environment is encapsulated by `renv` and should be reproducible from this.
+## Setup
 
-All data is sourced from [Our World in Data](https://github.com/owid/covid-19-data/tree/master/public/data). 
+First let's set up some global variables:
 
-Post-processing of this data set includes
+```{r globals}
+country_of_interest <- "AUS"
+countries <- c(country_of_interest, "NZL", "USA", "GBR", "CAN", "DEU", "FRA", 
+  "AUT", "BEL", "SGP", "DNK", "ISR", "ITA", "JPN", "KOR", "NLD", "FIN", "NOR", 
+  "SWE", "ESP", "CHE")
+countries <- countries[order(countries)]
+smaller_set_of_countries <- c(country_of_interest, "CAN", "NZL", "SGP")
+start_date <- as.Date("2021-09-01")
+```
+
+## Data sourcing 
+
+All data is sourced from [Our World in Data](https://github.com/owid/covid-19-data/tree/master/public/data).
+
+```{r source}
+url <- "https://covid.ourworldindata.org/data/owid-covid-data.csv"
+owid <- read_csv(url, show_col_types = FALSE)
+```
+
+## Data post-processing
 
-1. Filtering for countries that are part of the OECD (though not all have 
-   been included).
-2. Missing values are filled using the last available value.
+Post-processing of this data set includes:
+
+0. Translating the "wide" format OWID data set into a "long" format.
+1. Filtering for countries that are of interest (in my case, a selection of OECD
+   countries).
+2. Filling missing values with the last available value.
+
+```{r post_processing}
+owid_long_tbl <-
+  owid |>
+  filter(
+    iso_code %in% countries,
+    date >= start_date
+  ) |>
+  select(
+    iso_code,
+    location,
+    date,
+    stringency_index,
+    people_fully_vaccinated_per_hundred,
+    total_boosters_per_hundred,
+    new_tests_smoothed_per_thousand,
+    new_cases_smoothed_per_million,
+    # "Tests conducted per new confirmed case of COVID-19, given as a rolling
+    #  7-day average (this is the inverse of positive_rate)"
+    tests_per_case,
+    hosp_patients_per_million, # stock, not flow.
+    icu_patients_per_million,  # stock, not flow.
+    new_deaths_smoothed_per_million
+  ) |>
+  fill(
+    stringency_index,
+    people_fully_vaccinated_per_hundred,
+    total_boosters_per_hundred,
+    new_tests_smoothed_per_thousand,
+    new_cases_smoothed_per_million,
+    tests_per_case,
+    hosp_patients_per_million,
+    icu_patients_per_million,
+    new_deaths_smoothed_per_million
+  ) |>
+  pivot_longer(
+    cols = !c("iso_code", "location", "date"),
+    names_to = "indicator",
+    values_to = "value"
+  ) |>
+  mutate(
+    indicator = as_factor(indicator)
+  )
+```
+
+The following compares the country of interest to a selection of the other 
+countries since the `start_date` defined above.
+
+```{r time-series, fig.width = 11, fig.height = 8}
+owid_long_tbl |> 
+  filter(iso_code %in% smaller_set_of_countries) |>
+  ggplot(aes(x = date, y = value, colour = iso_code)) +
+  geom_point(size = 0.75) +
+  scale_colour_brewer(type = "qual", palette = "Set1") +
+  facet_wrap(vars(indicator), scales = "free_y") +
+  theme_light() +
+  theme(legend.position = "bottom") +
+  labs(
+    title = "COVID stats",
+    subtitle = "Time series of COVID stats for select countries",
+    x = "Date",
+    y = "Value",
+    colour = "Country ISO code",
+    caption = paste("Data sourced from Our World in Data.", 
+      "Prepared by @imanuelcostigan.")
+  )
+```
 
-The most recent boxplot benchmarking Australia to most other OECD countries is 
-presented below:
+And finally, we compare the country of interest to the other countries. We do
+this on the penultimate date that is published by OWID to ensure data is 
+available across all countries.
 
-```{r bench}
-library(fs)
-library(stringr)
-most_recent_date <- 
-  fs::dir_ls("fig/") |>
-  fs::path_file() |>
-  str_extract("[[:digit:]]{4}-[[:digit:]]{2}-[[:digit:]]{2}") |> 
-  as.Date() |>
-  max()
-fs::path("fig", paste0("bench_", most_recent_date), ext = "png") |>
-  knitr::include_graphics()
+```{r boxplot, fig.width = 10, fig.height = 10}
+benchmark_date <- (owid_long_tbl |> pull(date) |> max(na.rm = TRUE)) - 1
+owid_last_snap <-
+  owid_long_tbl |>
+  filter(date == benchmark_date)
+owid_country_of_interest <- 
+  owid_last_snap |>
+  filter(iso_code == country_of_interest)
+owid_last_snap |>
+  ggplot(aes(x = indicator, y = value)) + geom_boxplot()  +
+  geom_boxplot(data = owid_country_of_interest, colour = "red") +
+  facet_wrap(vars(indicator), scales = "free") +
+  theme_bw() +
+  theme(axis.text.x = element_blank()) +
+  labs(
+    title = "COVID benchmarking",
+    subtitle = paste0("Comparing ", country_of_interest, " (red) to ",
+      paste0(countries[countries != country_of_interest], collapse = ", ")),
+    caption = paste0("Data sourced from Our World in Data. Benchmarked for ",
+      benchmark_date, ". Prepared by @imanuelcostigan.")
+  )
 ```
 
diff --git a/README.html b/README.html
diff --git a/README.md b/README.md
@@ -1,35 +1,138 @@
 # COVID benchmarking
 
 This repository contains R code that I have used to benchmark a country
-against other countries.
+against other countries.I have benchmarked Australia against a selection
+of OECD countries.
 
-The core code is in the `analysis.R` file. All code and the associated
-runtime environment is encapsulated by `renv` and should be reproducible
-from this.
+## Setup
+
+First let’s set up some global variables:
+
+``` r
+country_of_interest <- "AUS"
+countries <- c(country_of_interest, "NZL", "USA", "GBR", "CAN", "DEU", "FRA", 
+  "AUT", "BEL", "SGP", "DNK", "ISR", "ITA", "JPN", "KOR", "NLD", "FIN", "NOR", 
+  "SWE", "ESP", "CHE")
+countries <- countries[order(countries)]
+smaller_set_of_countries <- c(country_of_interest, "CAN", "NZL", "SGP")
+start_date <- as.Date("2021-09-01")
+```
+
+## Data sourcing
 
 All data is sourced from [Our World in
 Data](https://github.com/owid/covid-19-data/tree/master/public/data).
 
-Post-processing of this data set includes
+``` r
+url <- "https://covid.ourworldindata.org/data/owid-covid-data.csv"
+owid <- read_csv(url, show_col_types = FALSE)
+```
+
+## Data post-processing
+
+Post-processing of this data set includes:
+
+0.  Translating the “wide” format OWID data set into a “long” format.
+1.  Filtering for countries that are of interest (in my case, a
+    selection of OECD countries).
+2.  Filling missing values with the last available value.
+
+``` r
+owid_long_tbl <-
+  owid |>
+  filter(
+    iso_code %in% countries,
+    date >= start_date
+  ) |>
+  select(
+    iso_code,
+    location,
+    date,
+    stringency_index,
+    people_fully_vaccinated_per_hundred,
+    total_boosters_per_hundred,
+    new_tests_smoothed_per_thousand,
+    new_cases_smoothed_per_million,
+    # "Tests conducted per new confirmed case of COVID-19, given as a rolling
+    #  7-day average (this is the inverse of positive_rate)"
+    tests_per_case,
+    hosp_patients_per_million, # stock, not flow.
+    icu_patients_per_million,  # stock, not flow.
+    new_deaths_smoothed_per_million
+  ) |>
+  fill(
+    stringency_index,
+    people_fully_vaccinated_per_hundred,
+    total_boosters_per_hundred,
+    new_tests_smoothed_per_thousand,
+    new_cases_smoothed_per_million,
+    tests_per_case,
+    hosp_patients_per_million,
+    icu_patients_per_million,
+    new_deaths_smoothed_per_million
+  ) |>
+  pivot_longer(
+    cols = !c("iso_code", "location", "date"),
+    names_to = "indicator",
+    values_to = "value"
+  ) |>
+  mutate(
+    indicator = as_factor(indicator)
+  )
+```
+
+The following compares the country of interest to a selection of the
+other countries since the `start_date` defined above.
+
+``` r
+owid_long_tbl |> 
+  filter(iso_code %in% smaller_set_of_countries) |>
+  ggplot(aes(x = date, y = value, colour = iso_code)) +
+  geom_point(size = 0.75) +
+  scale_colour_brewer(type = "qual", palette = "Set1") +
+  facet_wrap(vars(indicator), scales = "free_y") +
+  theme_light() +
+  theme(legend.position = "bottom") +
+  labs(
+    title = "COVID stats",
+    subtitle = "Time series of COVID stats for select countries",
+    x = "Date",
+    y = "Value",
+    colour = "Country ISO code",
+    caption = paste("Data sourced from Our World in Data.", 
+      "Prepared by @imanuelcostigan.")
+  )
+```
+
+    ## Warning: Removed 36 rows containing missing values (geom_point).
 
-1.  Filtering for countries that are part of the OECD (though not all
-    have been included).
-2.  Missing values are filled using the last available value.
+![](fig/time-series-1.svg)<!-- -->
 
-The most recent boxplot benchmarking Australia to most other OECD
-countries is presented below:
+And finally, we compare the country of interest to the other countries.
+We do this on the penultimate date that is published by OWID to ensure
+data is available across all countries.
 
 ``` r
-library(fs)
-library(stringr)
-most_recent_date <- 
-  fs::dir_ls("fig/") |>
-  fs::path_file() |>
-  str_extract("[[:digit:]]{4}-[[:digit:]]{2}-[[:digit:]]{2}") |> 
-  as.Date() |>
-  max()
-fs::path("fig", paste0("bench_", most_recent_date), ext = "png") |>
-  knitr::include_graphics()
+benchmark_date <- (owid_long_tbl |> pull(date) |> max(na.rm = TRUE)) - 1
+owid_last_snap <-
+  owid_long_tbl |>
+  filter(date == benchmark_date)
+owid_country_of_interest <- 
+  owid_last_snap |>
+  filter(iso_code == country_of_interest)
+owid_last_snap |>
+  ggplot(aes(x = indicator, y = value)) + geom_boxplot()  +
+  geom_boxplot(data = owid_country_of_interest, colour = "red") +
+  facet_wrap(vars(indicator), scales = "free") +
+  theme_bw() +
+  theme(axis.text.x = element_blank()) +
+  labs(
+    title = "COVID benchmarking",
+    subtitle = paste0("Comparing ", country_of_interest, " (red) to ",
+      paste0(countries[countries != country_of_interest], collapse = ", ")),
+    caption = paste0("Data sourced from Our World in Data. Benchmarked for ",
+      benchmark_date, ". Prepared by @imanuelcostigan.")
+  )
 ```
 
-![](fig/bench_2021-12-29.png)<!-- -->
+![](fig/boxplot-1.svg)<!-- -->