Skip to content

Commit

Permalink
Script to generate synthetic population. Use WY as a test.
Browse files Browse the repository at this point in the history
  • Loading branch information
confunguido committed Dec 6, 2024
1 parent b03a0db commit bcfd47e
Show file tree
Hide file tree
Showing 3 changed files with 99 additions and 1 deletion.
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -382,3 +382,6 @@ docs/site/

# lock file should be personalized
Cargo.lock

# Emacs
*~
2 changes: 1 addition & 1 deletion input/input.json
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
"infection_duration": 5.0,
"generation_interval": 5.0,
"report_period": 1.0,
"synth_population_file": "input/people_test.csv",
"synth_population_file": "input/synth_pop_people_WY.csv",
"population_periodic_report": "people_report.csv"
}
}
95 changes: 95 additions & 0 deletions scripts/create_synthetic_population.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
## =================================#
## Setup ---------------
## =================================#
library(tidyverse)
library(tigris)
library(tidycensus)
library(patchwork)

set.seed(1234)

state_synth <- "WY"
year_synth <- 2023
population_size <- 1000

## =================================#
## Get population ---------------
## =================================#
pums_vars <- pums_variables |>
filter(year == 2018, survey == "acs1") |>
distinct(var_code, var_label, data_type, level)

person_variables <- c("SPORDER", "SERIALNO", "PWGTP", "AGEP", "SEX", "PUMA", "REGION")

Check warning on line 22 in scripts/create_synthetic_population.R

View workflow job for this annotation

GitHub Actions / pre-commit

file=/home/runner/work/ixa-epi-isolation/ixa-epi-isolation/scripts/create_synthetic_population.R,line=22,col=81,[line_length_linter] Lines should not be more than 80 characters. This line is 86 characters.
house_variables <- c("WGTP", "NP")

sample_pums <- get_pums(
variables = c(person_variables, house_variables),
state = state_synth,
survey = "acs1",
year = year_synth
)

household_pums <- sample_pums |>
dplyr::select(SERIALNO, all_of(house_variables)) |>
distinct()

## =================================#
## Create population ---------------
## =================================#
synth_pop_df <- tibble()
house_counter <- 0
while (nrow(synth_pop_df) < population_size) {
house_counter <- house_counter + 1
synth_pop_df <- synth_pop_df |>
bind_rows(household_pums |>
sample_n(1, weight = WGTP) |>

Check warning on line 45 in scripts/create_synthetic_population.R

View workflow job for this annotation

GitHub Actions / pre-commit

file=/home/runner/work/ixa-epi-isolation/ixa-epi-isolation/scripts/create_synthetic_population.R,line=45,col=6,[indentation_linter] Indentation should be 16 spaces but is 6 spaces.
left_join(sample_pums, by = (c("SERIALNO", "WGTP", "NP"))) |>
mutate(house_number = house_counter))
}

## =================================#
## Recode and math GEO -----------
## =================================#
## For now, we will use PUMA codes
## instead of census tracts
pumas_st <- pumas(state = state_synth)
tracts_st <- tracts(state = state_synth)

synth_pop_region_df <- synth_pop_df |>
left_join(
pumas_st |>
dplyr::select(STATEFP20, PUMACE20, INTPTLAT20, INTPTLON20),
by = c("PUMA" = "PUMACE20")
) |>
dplyr::select(-geometry) |>
mutate(
region_id = sprintf("%02d%09d", as.numeric(STATE), as.numeric(PUMA)),
homeId = sprintf("%02d%09d%06d", as.numeric(STATE), as.numeric(PUMA), house_number)

Check warning on line 67 in scripts/create_synthetic_population.R

View workflow job for this annotation

GitHub Actions / pre-commit

file=/home/runner/work/ixa-epi-isolation/ixa-epi-isolation/scripts/create_synthetic_population.R,line=67,col=81,[line_length_linter] Lines should not be more than 80 characters. This line is 87 characters.
)


## split pop in persons and regions
## People columns: age, homeId
people_df <- synth_pop_region_df |>
dplyr::select(AGEP, homeId) |>
dplyr::rename(age = AGEP)


## Region columns: region_id, lat, lon
region_df <- synth_pop_region_df |>
dplyr::mutate(lat = as.numeric(INTPTLAT20), lon = as.numeric(INTPTLON20)) |>
dplyr::select(region_id, lat, lon)

write_csv(region_df, file.path("input", sprintf("synth_pop_region_%s.csv", state_synth)))

Check warning on line 83 in scripts/create_synthetic_population.R

View workflow job for this annotation

GitHub Actions / pre-commit

file=/home/runner/work/ixa-epi-isolation/ixa-epi-isolation/scripts/create_synthetic_population.R,line=83,col=81,[line_length_linter] Lines should not be more than 80 characters. This line is 89 characters.
write_csv(people_df, file.path("input", sprintf("synth_pop_people_%s.csv", state_synth)))

Check warning on line 84 in scripts/create_synthetic_population.R

View workflow job for this annotation

GitHub Actions / pre-commit

file=/home/runner/work/ixa-epi-isolation/ixa-epi-isolation/scripts/create_synthetic_population.R,line=84,col=81,[line_length_linter] Lines should not be more than 80 characters. This line is 89 characters.
## =================================#
## Quick plot -----------
## =================================#
g1 <- ggplot(region_df) +
aes(x = lon, y = lat) +
geom_point()

g2 <- ggplot(pumas_st) +
geom_sf() +
theme_void()
g1 + g2

0 comments on commit bcfd47e

Please sign in to comment.