Skip to content

Commit

Permalink
Update 04_exercise.R
Browse files Browse the repository at this point in the history
  • Loading branch information
vankesteren committed Apr 15, 2024
1 parent dbf8b83 commit 12cd50b
Showing 1 changed file with 35 additions and 12 deletions.
47 changes: 35 additions & 12 deletions 04_exercise.R
Original file line number Diff line number Diff line change
@@ -1,21 +1,44 @@
# Exercise time!

# The assignment.
# Create a plot with on the x-axis the three different contract types,
# and on the y-axis the average wage per unit time (wage per hour).
# Add 95% confidence intervals (+- 1.96 * standard error of the mean)
library(tidyverse)
library(haven)
spfns <- list.files("fake_cbs_data/Spolis/", full.names = TRUE)
spolis_loc <- "fake_cbs_data/Spolis/SPOLISBUS2022V2.sav"

# the plot for 100k rows below. With more samples
# we can bring the s.e. down to see if there is a
# significant difference between these items
df_example <- read_spss(spolis_loc, n_max = 1e5)

df_example |>
summarize(
mean = mean(SBASISLOON / pmax(1, SBASISUREN)),
stderr = sd(SBASISLOON / pmax(1, SBASISUREN)) / sqrt(n()),
lower = mean - 1.96*stderr,
upper = mean + 1.96*stderr,
.by = SCONTRACTSOORT
) |>
ggplot(aes(
x = as_factor(SCONTRACTSOORT, levels = "labels"),
y = mean,
ymax = upper,
ymin = lower
)) +
geom_pointrange() +
labs(
x = "Contract type",
y = "Average wage",
title = "Average wage per unit time for different contract types."
) +
theme_linedraw()


bigtab <- read_spss(spfns[1])
for (fn in spfns[-1]) {
cat("reading", fn, "\r")
bigtab <- bind_rows(bigtab, read_spss(fn))
}
# Use your skills to do this for the whole data without loading it all
# in at once! What is your conclusion?

write_rds(bigtab, "bigtab.rds")


N <- nrow(bigtab)
bigtab$IKVID <- as.character(round(runif(N, min = 100000000000, max = 999999999999)))
bigtab$SDATUMAANVANGIKO <- as.Date("2013-01-01")
bigtab$SDATUMEINDEIKO <- as.Date("2022-08-23")

write_sav(data = bigtab, path = "fake_cbs_data/Spolis/SPOLISBUS2022V2.sav")

0 comments on commit 12cd50b

Please sign in to comment.