-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
dbf8b83
commit 12cd50b
Showing
1 changed file
with
35 additions
and
12 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,21 +1,44 @@ | ||
# Exercise time! | ||
|
||
# The assignment. | ||
# Create a plot with on the x-axis the three different contract types, | ||
# and on the y-axis the average wage per unit time (wage per hour). | ||
# Add 95% confidence intervals (+- 1.96 * standard error of the mean) | ||
library(tidyverse) | ||
library(haven) | ||
spfns <- list.files("fake_cbs_data/Spolis/", full.names = TRUE) | ||
spolis_loc <- "fake_cbs_data/Spolis/SPOLISBUS2022V2.sav" | ||
|
||
# the plot for 100k rows below. With more samples | ||
# we can bring the s.e. down to see if there is a | ||
# significant difference between these items | ||
df_example <- read_spss(spolis_loc, n_max = 1e5) | ||
|
||
df_example |> | ||
summarize( | ||
mean = mean(SBASISLOON / pmax(1, SBASISUREN)), | ||
stderr = sd(SBASISLOON / pmax(1, SBASISUREN)) / sqrt(n()), | ||
lower = mean - 1.96*stderr, | ||
upper = mean + 1.96*stderr, | ||
.by = SCONTRACTSOORT | ||
) |> | ||
ggplot(aes( | ||
x = as_factor(SCONTRACTSOORT, levels = "labels"), | ||
y = mean, | ||
ymax = upper, | ||
ymin = lower | ||
)) + | ||
geom_pointrange() + | ||
labs( | ||
x = "Contract type", | ||
y = "Average wage", | ||
title = "Average wage per unit time for different contract types." | ||
) + | ||
theme_linedraw() | ||
|
||
|
||
bigtab <- read_spss(spfns[1]) | ||
for (fn in spfns[-1]) { | ||
cat("reading", fn, "\r") | ||
bigtab <- bind_rows(bigtab, read_spss(fn)) | ||
} | ||
# Use your skills to do this for the whole data without loading it all | ||
# in at once! What is your conclusion? | ||
|
||
write_rds(bigtab, "bigtab.rds") | ||
|
||
|
||
N <- nrow(bigtab) | ||
bigtab$IKVID <- as.character(round(runif(N, min = 100000000000, max = 999999999999))) | ||
bigtab$SDATUMAANVANGIKO <- as.Date("2013-01-01") | ||
bigtab$SDATUMEINDEIKO <- as.Date("2022-08-23") | ||
|
||
write_sav(data = bigtab, path = "fake_cbs_data/Spolis/SPOLISBUS2022V2.sav") |