From 12cd50bc3f3564b6b43c970147c2e563c6aa30a8 Mon Sep 17 00:00:00 2001 From: Erik-Jan van Kesteren Date: Mon, 15 Apr 2024 15:39:19 +0200 Subject: [PATCH] Update 04_exercise.R --- 04_exercise.R | 47 +++++++++++++++++++++++++++++++++++------------ 1 file changed, 35 insertions(+), 12 deletions(-) diff --git a/04_exercise.R b/04_exercise.R index 274b9c2..115fc09 100644 --- a/04_exercise.R +++ b/04_exercise.R @@ -1,21 +1,44 @@ # Exercise time! + +# The assignment. +# Create a plot with on the x-axis the three different contract types, +# and on the y-axis the average wage per unit time (wage per hour). +# Add 95% confidence intervals (+- 1.96 * standard error of the mean) library(tidyverse) library(haven) -spfns <- list.files("fake_cbs_data/Spolis/", full.names = TRUE) +spolis_loc <- "fake_cbs_data/Spolis/SPOLISBUS2022V2.sav" + +# the plot for 100k rows below. With more samples +# we can bring the s.e. down to see if there is a +# significant difference between these items +df_example <- read_spss(spolis_loc, n_max = 1e5) + +df_example |> + summarize( + mean = mean(SBASISLOON / pmax(1, SBASISUREN)), + stderr = sd(SBASISLOON / pmax(1, SBASISUREN)) / sqrt(n()), + lower = mean - 1.96*stderr, + upper = mean + 1.96*stderr, + .by = SCONTRACTSOORT + ) |> + ggplot(aes( + x = as_factor(SCONTRACTSOORT, levels = "labels"), + y = mean, + ymax = upper, + ymin = lower + )) + + geom_pointrange() + + labs( + x = "Contract type", + y = "Average wage", + title = "Average wage per unit time for different contract types." + ) + + theme_linedraw() -bigtab <- read_spss(spfns[1]) -for (fn in spfns[-1]) { - cat("reading", fn, "\r") - bigtab <- bind_rows(bigtab, read_spss(fn)) -} +# Use your skills to do this for the whole data without loading it all +# in at once! What is your conclusion? -write_rds(bigtab, "bigtab.rds") -N <- nrow(bigtab) -bigtab$IKVID <- as.character(round(runif(N, min = 100000000000, max = 999999999999))) -bigtab$SDATUMAANVANGIKO <- as.Date("2013-01-01") -bigtab$SDATUMEINDEIKO <- as.Date("2022-08-23") -write_sav(data = bigtab, path = "fake_cbs_data/Spolis/SPOLISBUS2022V2.sav")