-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy path03_create_grid.R
71 lines (58 loc) · 2.09 KB
/
03_create_grid.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
# Introduction to creating a condition grid in R
# last edited 2022-04-04 by @vankesteren
# ODISSEI Social Data Science team
library(tidyverse)
library(sf)
# Read data ----
# I have prepared a dataset in the data_processed/ folder
migr_sf <- read_rds("data_processed/migr_sf.rds")
# plot: where is nonwestern migration?
ggplot(migr_sf) +
geom_sf(aes(fill = nonwest), col = "transparent") +
scale_fill_viridis_c() +
theme_minimal() +
labs(title = "Nonwest")
# Create a condition grid ----
# using tidyverse data processing pipeline
grid_tbl <-
as_tibble(migr_sf) |> # first, transform to normal data frame from sf object
mutate(row = 1:n()) |> # then, add row numbers as a column
select(row, nl, west, nonwest) |> # then, select only row number and proportions
mutate(
# add iteration counter (50) and parameter grid as list-columns
iter = list(1:50),
Ba = list(seq(.05, .95, .01))
) |>
unnest_longer(Ba) |> # unnest parameter grid
unnest_longer(iter) # unnest iteration number
# write to file
write_rds(grid_tbl, "data_processed/grid_tbl.rds")
# Preview: how to aggregate results from this grid? ----
# put some random numbers / fake results in the condition grid.
result_tbl <- grid_tbl |> mutate(result = rnorm(n()))
# use dplyr language to group and summarize the output
result <-
result_tbl |>
group_by(row, Ba) |> # grouping
summarise(output = mean(result, na.rm = TRUE)) |> # summarizing
summarise(final = sample(output, 1)) # summarizing further across Ba
# then we can add it to our sf dataset and plot the outcome
migr_sf |>
mutate(result = result$final) |>
ggplot() +
geom_sf(aes(fill = result), col = "transparent") +
scale_fill_viridis_c() +
theme_minimal() +
labs(title = "Results")
# How many jobs will we need on the supercomputer? ----
# each node has 16 cores
n_cores <- 16
# each core can estimate about 400 models per minute
mod_rate <- 400
# we want to run each job for about 30 minutes
job_time <- 5
# get final chunk size
chunk_size <- n_cores*mod_rate*job_time
# number of jobs needed:
ceiling(nrow(grid_tbl) / chunk_size)
# 462!