diff --git a/DESCRIPTION b/DESCRIPTION index 7dae406..4ba6bee 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -10,7 +10,8 @@ Authors@R: person("Ege Can", "Taşlıçukur", role = "aut"), person("Vedha", "Viyash", role = "aut"), person("David", "Blair", role = "aut"), - person("Daniel D.", "Sjoberg", , "danield.sjoberg@gmail.com", role = "aut", comment = c(ORCID = "0000-0003-0862-2018")) + person("Daniel D.", "Sjoberg", , "danield.sjoberg@gmail.com", role = "aut", comment = c(ORCID = "0000-0003-0862-2018")), + person("Fanny", "Gautier", role = "aut", comment = c(ORCID = "0009-0004-3581-0131")) Description: This is not a package, but we just use this file to declare the dependencies of the site. URL: https://github.com/pharmaverse/examples diff --git a/adam/adsl.R b/adam/adsl.R index ad4c1e9..bc4a529 100644 --- a/adam/adsl.R +++ b/adam/adsl.R @@ -10,40 +10,94 @@ library(lubridate) library(stringr) # Read in input SDTM data -data("dm") -data("ex") +dm <- pharmaversesdtm::dm +ds <- pharmaversesdtm::ds +ex <- pharmaversesdtm::ex +ae <- pharmaversesdtm::ae +vs <- pharmaversesdtm::vs +suppdm <- pharmaversesdtm::suppdm + +# When SAS datasets are imported into R using haven::read_sas(), missing +# character values from SAS appear as "" characters in R, instead of appearing +# as NA values. Further details can be obtained via the following link: +# https://pharmaverse.github.io/admiral/articles/admiral.html#handling-of-missing-values +dm <- convert_blanks_to_na(dm) +ds <- convert_blanks_to_na(ds) +ex <- convert_blanks_to_na(ex) +ae <- convert_blanks_to_na(ae) +vs <- convert_blanks_to_na(vs) +suppdm <- convert_blanks_to_na(suppdm) + +# Combine Parent and Supp - very handy! ---- +dm_suppdm <- combine_supp(dm, suppdm) ## ----r metacore, warning=FALSE, results='hold'-------------------------------- # Read in metacore object -load(metacore_example("pilot_ADaM.rda")) -metacore <- metacore %>% - select_dataset("ADSL") - -## ----r------------------------------------------------------------------------ -metacore$ds_vars +metacore <- spec_to_metacore( + path = "./metadata/safety_specs.xlsx", + where_sep_sheet = FALSE, + quiet = TRUE +) %>% + select_dataset("ADSL") ## ----r demographics----------------------------------------------------------- adsl_preds <- build_from_derived(metacore, - ds_list = list("dm" = dm), - predecessor_only = FALSE, keep = TRUE) -head(adsl_preds, n=10) + ds_list = list("dm" = dm_suppdm, "suppdm" = dm_suppdm), + predecessor_only = FALSE, keep = FALSE) ## ----r------------------------------------------------------------------------ get_control_term(metacore, variable = AGEGR1) -## ----r ct--------------------------------------------------------------------- +## ----r grouping_option_1------------------------------------------------------ adsl_ct <- adsl_preds %>% - create_cat_var(metacore, ref_var = AGE, - grp_var = AGEGR1, num_grp_var = AGEGR1N) %>% - create_var_from_codelist(metacore = metacore, - input_var = RACE, - out_var = RACEN) %>% - # Removing screen failures from ARM and TRT01P to match the define and FDA guidance - mutate(ARM = if_else(ARM == "Screen Failure", NA_character_, ARM), - TRT01P = if_else(TRT01P == "Screen Failure", NA_character_, TRT01P) - ) - -head(adsl_ct, n=10) + create_cat_var(metacore, ref_var = AGE, + grp_var = AGEGR1, num_grp_var = AGEGR1N) + + +## ----r grouping_option_2------------------------------------------------------ +agegr1_lookup <- exprs( + ~condition, ~AGEGR1, ~AGEGR1N, + AGE < 18, "<18", 1, + between(AGE, 18, 64), "18-64", 2, + !is.na(AGE), ">64", 3, + is.na(AGE), "Missing", 4 +) + +adsl_cat <- derive_vars_cat( + dataset = adsl_preds, + definition = agegr1_lookup +) + +## ----r grouping_option_3------------------------------------------------------ +format_agegr1 <- function(var_input) { + case_when( + var_input < 18 ~ "<18", + between(var_input, 18, 64) ~ "18-64", + var_input > 64 ~ ">64", + TRUE ~ "Missing" + ) +} + +format_agegr1n <- function(var_input) { + case_when( + var_input < 18 ~ 1, + between(var_input, 18, 64) ~ 2, + var_input > 64 ~ 3, + TRUE ~ 4 + ) +} + +adsl_cust <- adsl_preds %>% + mutate( + AGEGR1 = format_agegr1(AGE), + AGEGR1N = format_agegr1n(AGE) + ) + +## ----r codelist--------------------------------------------------------------- +adsl_ct <- adsl_ct %>% + create_var_from_codelist(metacore = metacore, + input_var = RACE, + out_var = RACEN) ## ----r exposure--------------------------------------------------------------- ex_ext <- ex %>% @@ -58,82 +112,210 @@ ex_ext <- ex %>% ) adsl_raw <- adsl_ct %>% + # Treatment Start Datetime derive_vars_merged( dataset_add = ex_ext, filter_add = (EXDOSE > 0 | - (EXDOSE == 0 & - str_detect(EXTRT, "PLACEBO"))) & nchar(EXSTDTC) >= 10, - new_vars = exprs(TRTSDTM = EXSTDTM), + (EXDOSE == 0 & + str_detect(EXTRT, "PLACEBO"))) & !is.na(EXSTDTM), + new_vars = exprs(TRTSDTM = EXSTDTM, TRTSTMF = EXSTTMF), order = exprs(EXSTDTM, EXSEQ), mode = "first", by_vars = exprs(STUDYID, USUBJID) ) %>% + # Treatment End Datetime derive_vars_merged( dataset_add = ex_ext, filter_add = (EXDOSE > 0 | - (EXDOSE == 0 & - str_detect(EXTRT, "PLACEBO"))) & nchar(EXENDTC) >= 10, - new_vars = exprs(TRTEDTM = EXENDTM), + (EXDOSE == 0 & + str_detect(EXTRT, "PLACEBO"))) & !is.na(EXENDTM), + new_vars = exprs(TRTEDTM = EXENDTM, TRTETMF = EXENTMF), order = exprs(EXENDTM, EXSEQ), mode = "last", by_vars = exprs(STUDYID, USUBJID) ) %>% - derive_vars_dtm_to_dt(source_vars = exprs(TRTSDTM, TRTEDTM)) %>% # Convert Datetime variables to date - derive_var_trtdurd() %>% - derive_var_merged_exist_flag( - dataset_add = ex, - by_vars = exprs(STUDYID, USUBJID), - new_var = SAFFL, - condition = (EXDOSE > 0 | (EXDOSE == 0 & str_detect(EXTRT, "PLACEBO"))) - ) %>% - drop_unspec_vars(metacore) # This will drop any columns that aren't specified in the metacore object - -head(adsl_raw, n=10) - -## ----r, warning=FALSE, message=FALSE, include=FALSE--------------------------- -# Create dummy variables to match metacore specs to avoid later errors -# In practice these would be mainly created using derivation functions from admiral -adsl_raw <- adsl_raw %>% + # Treatment Start and End Date + derive_vars_dtm_to_dt(source_vars = exprs(TRTSDTM, TRTEDTM)) %>% # Convert Datetime variables to date + # Treatment Start Time + derive_vars_dtm_to_tm(source_vars = exprs(TRTSDTM)) %>% + # Treatment Duration + derive_var_trtdurd() %>% + # Safety Population Flag + derive_var_merged_exist_flag( + dataset_add = ex, + by_vars = exprs(STUDYID, USUBJID), + new_var = SAFFL, + condition = (EXDOSE > 0 | (EXDOSE == 0 & str_detect(EXTRT, "PLACEBO"))) + ) %>% + drop_unspec_vars(metacore) # This will drop any columns that are not specified in the metacore object + +## ----r treatment_char, eval=TRUE---------------------------------------------- +adsl <- adsl_raw %>% + mutate(TRT01P = if_else(ARM %in% c("Screen Failure", "Not Assigned", "Not Treated"), "No Treatment", ARM), + TRT01A = if_else(ACTARM %in% c("Screen Failure", "Not Assigned", "Not Treated"), "No Treatment", ACTARM) + ) + +## ----r treatment_num, eval=TRUE----------------------------------------------- +adsl <- adsl %>% + create_var_from_codelist(metacore, input_var = TRT01P, out_var = TRT01PN) %>% + create_var_from_codelist(metacore, input_var = TRT01A, out_var = TRT01AN) + +## ----r disposition, eval=TRUE------------------------------------------------- +# Convert character date to numeric date without imputation +ds_ext <- derive_vars_dt( + ds, + dtc = DSSTDTC, + new_vars_prefix = "DSST" +) + +adsl <- adsl %>% + derive_vars_merged( + dataset_add = ds_ext, + by_vars = exprs(STUDYID, USUBJID), + new_vars = exprs(EOSDT = DSSTDT), + filter_add = DSCAT == "DISPOSITION EVENT" & DSDECOD != "SCREEN FAILURE" + ) + +## ----r eval=TRUE-------------------------------------------------------------- +format_eosstt <- function(x) { + case_when( + x %in% c("COMPLETED") ~ "COMPLETED", + x %in% c("SCREEN FAILURE") ~ NA_character_, + TRUE ~ "DISCONTINUED" + ) +} + +## ----r eval=TRUE-------------------------------------------------------------- +adsl <- adsl %>% + derive_vars_merged( + dataset_add = ds, + by_vars = exprs(STUDYID, USUBJID), + filter_add = DSCAT == "DISPOSITION EVENT", + new_vars = exprs(EOSSTT = format_eosstt(DSDECOD)), + missing_values = exprs(EOSSTT = "ONGOING") + ) + +## ----r eval=TRUE-------------------------------------------------------------- +adsl <- adsl %>% + derive_vars_dt( + new_vars_prefix = "DTH", + dtc = DTHDTC, + highest_imputation = "M", + date_imputation = "first" + ) + +## ----r eval=TRUE-------------------------------------------------------------- +adsl <- adsl %>% + derive_vars_merged( + dataset_add = ds_ext, + by_vars = exprs(STUDYID, USUBJID), + new_vars = exprs(RANDDT = DSSTDT), + filter_add = DSDECOD == "RANDOMIZED", + ) %>% + derive_vars_merged( + dataset_add = ds_ext, + by_vars = exprs(STUDYID, USUBJID), + new_vars = exprs(SCRFDT = DSSTDT), + filter_add = DSCAT == "DISPOSITION EVENT" & DSDECOD == "SCREEN FAILURE" + ) %>% + derive_vars_merged( + dataset_add = ds_ext, + by_vars = exprs(STUDYID, USUBJID), + new_vars = exprs(FRVDT = DSSTDT), + filter_add = DSCAT == "OTHER EVENT" & DSDECOD == "FINAL RETRIEVAL VISIT" + ) + +## ----r eval=TRUE-------------------------------------------------------------- +adsl <- adsl %>% + derive_vars_duration( + new_var = DTHADY, + start_date = TRTSDT, + end_date = DTHDT + ) %>% + derive_vars_duration( + new_var = LDDTHELD, + start_date = TRTEDT, + end_date = DTHDT, + add_one = FALSE + ) + +## ----r eval=TRUE-------------------------------------------------------------- +assign_randfl <- function(x) { + if_else(!is.na(x), "Y", NA_character_) +} + +adsl <- adsl %>% mutate( - SITEGR1 = NA, - TRT01PN = NA, - TRT01A = NA, - TRT01AN = NA, - AVGDD = NA, - CUMDOSE = NA, - ITTFL = NA, - EFFFL = NA, - COMP8FL = NA, - COMP16FL = NA, - COMP24FL = NA, - DISCONFL = NA, - DSRAEFL = NA, - BMIBL = NA, - BMIBLGR1 = NA, - HEIGHTBL = NA, - WEIGHTBL = NA, - EDUCLVL = NA, - DISONSDT = NA, - DURDIS = NA, - DURDSGR1 = NA, - VISIT1DT = NA, - VISNUMEN = NA, - RFENDT = NA, - DCDECOD = NA, - EOSSTT = NA, - DCSREAS = NA, - MMSETOT = NA + RANDFL = assign_randfl(RANDDT) ) -## ----r checks, warning=FALSE, message=FALSE----------------------------------- +## ----r death, eval=TRUE------------------------------------------------------- +adsl <- adsl %>% + derive_vars_extreme_event( + by_vars = exprs(STUDYID, USUBJID), + events = list( + event( + dataset_name = "ae", + condition = AEOUT == "FATAL", + set_values_to = exprs(DTHCAUS = AEDECOD, DTHDOM = "AE"), + ), + event( + dataset_name = "ds", + condition = DSDECOD == "DEATH" & grepl("DEATH DUE TO", DSTERM), + set_values_to = exprs(DTHCAUS = DSTERM, DTHDOM = "DS"), + ) + ), + source_datasets = list(ae = ae, ds = ds), + tmp_event_nr_var = event_nr, + order = exprs(event_nr), + mode = "first", + new_vars = exprs(DTHCAUS, DTHDOM) + ) -adsl_raw %>% - check_variables(metacore) %>% # Check all variables specified are present and no more - check_ct_data(metacore, na_acceptable = TRUE) %>% # Checks all variables with CT only contain values within the CT - order_cols(metacore) %>% # Orders the columns according to the spec - sort_by_key(metacore) %>% # Sorts the rows by the sort keys - xportr_type(metacore, domain = "ADSL") %>% # Coerce variable type to match spec - xportr_length(metacore) %>% # Assigns SAS length from a variable level metadata - xportr_label(metacore) %>% # Assigns variable label from metacore specifications - xportr_df_label(metacore) # Assigns dataset label from metacore specifications +## ----r grouping, eval=TRUE---------------------------------------------------- +region1_lookup <- exprs( + ~condition, ~REGION1, ~REGION1N, + COUNTRY %in% c("CAN", "USA"), "North America", 1, + !is.na(COUNTRY), "Rest of the World", 2, + is.na(COUNTRY), "Missing", 3 +) + +racegr1_lookup <- exprs( + ~condition, ~RACEGR1, ~RACEGR1N, + RACE %in% c("WHITE"), "White", 1, + RACE != "WHITE", "Non-white", 2, + is.na(RACE), "Missing", 3 +) + +dthcgr1_lookup <- exprs( + ~condition, ~DTHCGR1, ~DTHCGR1N, + DTHDOM == "AE", "ADVERSE EVENT", 1, + !is.na(DTHDOM) & str_detect(DTHCAUS, "(PROGRESSIVE DISEASE|DISEASE RELAPSE)"), "PROGRESSIVE DISEASE", 2, + !is.na(DTHDOM) & !is.na(DTHCAUS), "OTHER", 3, + is.na(DTHDOM), NA_character_, NA +) + +adsl <- adsl %>% + derive_vars_cat( + definition = region1_lookup + ) %>% + derive_vars_cat( + definition = racegr1_lookup + ) %>% + derive_vars_cat( + definition = dthcgr1_lookup + ) + +## ----r checks, warning=FALSE, message=FALSE----------------------------------- +dir <- tempdir() # Specify the directory for saving the XPT file +adsl %>% + check_variables(metacore) %>% # Check all variables specified are present and no more + check_ct_data(metacore, na_acceptable = TRUE) %>% # Checks all variables with CT only contain values within the CT + order_cols(metacore) %>% # Orders the columns according to the spec + sort_by_key(metacore) %>% # Sorts the rows by the sort keys + xportr_type(metacore, domain = "ADSL") %>% # Coerce variable type to match spec + xportr_length(metacore) %>% # Assigns SAS length from a variable level metadata + xportr_label(metacore) %>% # Assigns variable label from metacore specifications + xportr_df_label(metacore) %>% # Assigns dataset label from metacore specifications + xportr_write(file.path(dir, "adsl.xpt"), metadata = metacore, domain = "ADSL") diff --git a/adam/adsl.qmd b/adam/adsl.qmd index 70c4bf3..7acd107 100644 --- a/adam/adsl.qmd +++ b/adam/adsl.qmd @@ -19,7 +19,8 @@ The four packages used with a brief description of their purpose are as follows: - [`{admiral}`](https://pharmaverse.github.io/admiral/index.html): provides the ADaM derivations. - [`{xportr}`](https://atorus-research.github.io/xportr/): delivers the SAS transport file (XPT) and eSub checks. -It is important to understand `{metacore}` objects by reading through the above linked package site, as these are fundamental to being able to use `{metatools}` and `{xportr}`. Each company may need to build a specification reader to create these objects from their source standard specification templates. +It is important to understand `{metacore}` objects by reading through the above linked package site, as these are fundamental to being able to use `{metatools}` and `{xportr}`. +Each company may need to build a specification reader to create these objects from their source standard specification templates. ## Load Data and Required pharmaverse Packages @@ -37,64 +38,150 @@ library(lubridate) library(stringr) # Read in input SDTM data -data("dm") -data("ex") +dm <- pharmaversesdtm::dm +ds <- pharmaversesdtm::ds +ex <- pharmaversesdtm::ex +ae <- pharmaversesdtm::ae +vs <- pharmaversesdtm::vs +suppdm <- pharmaversesdtm::suppdm + +# When SAS datasets are imported into R using haven::read_sas(), missing +# character values from SAS appear as "" characters in R, instead of appearing +# as NA values. Further details can be obtained via the following link: +# https://pharmaverse.github.io/admiral/articles/admiral.html#handling-of-missing-values +dm <- convert_blanks_to_na(dm) +ds <- convert_blanks_to_na(ds) +ex <- convert_blanks_to_na(ex) +ae <- convert_blanks_to_na(ae) +vs <- convert_blanks_to_na(vs) +suppdm <- convert_blanks_to_na(suppdm) + +# Combine Parent and Supp - very handy! ---- +dm_suppdm <- combine_supp(dm, suppdm) ``` Next we need to load the specification file in the form of a `{metacore}` object. ```{r metacore, warning=FALSE, results='hold'} # Read in metacore object -load(metacore_example("pilot_ADaM.rda")) -metacore <- metacore %>% - select_dataset("ADSL") +metacore <- spec_to_metacore( + path = "./metadata/safety_specs.xlsx", + # All datasets are described in the same sheet + where_sep_sheet = FALSE +) %>% + select_dataset("ADSL") ``` - -Here is an example of how a `{metacore}` object looks showing variable level metadata: - -```{r} -metacore$ds_vars -``` - ## Start Building Derivations The first derivation step we are going to do is to pull through all the columns that come directly from the SDTM datasets. -In this case all the columns come from `DM` so that is the only dataset we will pass into `metatools::build_from_derived()`. The resulting dataset has all the columns combined and any columns that needed renaming between SDTM and ADaM are renamed. +In this case all the columns come from `DM` and `SUPPDM`so that are the only datasets we will pass into `metatools::build_from_derived()`. +The resulting dataset has all the columns combined and any columns that needed renaming between SDTM and ADaM are renamed. ```{r demographics} adsl_preds <- build_from_derived(metacore, - ds_list = list("dm" = dm), - predecessor_only = FALSE, keep = TRUE) + ds_list = list("dm" = dm_suppdm, "suppdm" = dm_suppdm), + predecessor_only = FALSE, keep = FALSE) + head(adsl_preds, n=10) ``` -Now we have the base dataset, we can start to create some variables. We can start with creating the subgroups using the controlled terminology, in this case `AGEGR1`. The metacore object holds all the metadata needed to make `ADSL`. Part of that metadata is the controlled terminology, which can help automate the creation of subgroups. We can look into the `{metacore}` object and see the controlled terminology for `AGEGR1`. +### Grouping variables + +Now we have the base dataset, we can start to create some variables. +There are a few options to create grouping variables and their corresponding numeric variables. + +Option 1: We can start with creating the subgroups using the controlled terminology, in this case `AGEGR1`. +The metacore object holds all the metadata needed to make `ADSL`. Part of that metadata is the controlled terminology, +which can help automate the creation of subgroups. We can look into the `{metacore}` object and see the controlled terminology for `AGEGR1`. ```{r} get_control_term(metacore, variable = AGEGR1) ``` -Because this controlled terminology is written in a fairly standard format we can automate the creation of `AGEGR1`. The function `metatools::create_cat_var()` takes in a `{metacore}` object, a reference variable - in this case `AGE` because that is the continuous variable `AGEGR1` is created from, and the name of the sub-grouped variable. It will take the controlled terminology from the sub-grouped variable and group the reference variables accordingly. - -Using a similar philosophy we can create the numeric version of `RACE` using the controlled terminology stored in the `{metacore}` object with the `metatools::create_var_from_codelist()` function. +Because this controlled terminology is written in a fairly standard format we can automate the creation of `AGEGR1`. +The function `metatools::create_cat_var()` takes in a `{metacore}` object, a reference variable - +in this case `AGE` because that is the continuous variable `AGEGR1` is created from, and the name of the sub-grouped variable. +It will take the controlled terminology from the sub-grouped variable and group the reference variables accordingly. -```{r ct} +```{r grouping_option_1} adsl_ct <- adsl_preds %>% create_cat_var(metacore, ref_var = AGE, - grp_var = AGEGR1, num_grp_var = AGEGR1N) %>% + grp_var = AGEGR1, num_grp_var = AGEGR1N) + +head(adsl_ct, n=10) +``` + +Option 2: We can create the subgroups using the `admiral::derive_vars_cat()` function available since `{admiral}` v1.2.0. +This function is especially useful if more than one variable needs to be created for each condition, e.g., `AGEGR1` and `AGEGR1N`. +Additionally, one needs to be careful when considering the order of the conditions in the lookup table. +The category is assigned based on the first match. That means *catch-all* conditions must come after specific conditions, e.g. `!is.na(AGE)` must come after `between(AGE, 18, 64)`. + +```{r grouping_option_2} +agegr1_lookup <- exprs( + ~condition, ~AGEGR1, ~AGEGR1N, + AGE < 18, "<18", 1, + between(AGE, 18, 64), "18-64", 2, + !is.na(AGE), ">64", 3, + is.na(AGE), "Missing", 4 +) + +adsl_cat <- derive_vars_cat( + dataset = adsl_preds, + definition = agegr1_lookup + ) + +head(adsl_cat %>% select(USUBJID, AGE, AGEU, AGEGR1, AGEGR1N), n=10) +``` + +Option 3: We can also solve this subgroups task with custom functions. + +```{r grouping_option_3} +format_agegr1 <- function(var_input) { + case_when( + var_input < 18 ~ "<18", + between(var_input, 18, 64) ~ "18-64", + var_input > 64 ~ ">64", + TRUE ~ "Missing" + ) +} + +format_agegr1n <- function(var_input) { + case_when( + var_input < 18 ~ 1, + between(var_input, 18, 64) ~ 2, + var_input > 64 ~ 3, + TRUE ~ 4 + ) +} + +adsl_cust <- adsl_preds %>% + mutate( + AGEGR1 = format_agegr1(AGE), + AGEGR1N = format_agegr1n(AGE) + ) + +head(adsl_cust %>% select(USUBJID, AGE, AGEU, AGEGR1, AGEGR1N), n=10) +``` + +Using a similar philosophy we can create the numeric version of `RACE` using the controlled terminology stored in the `{metacore}` object with the `metatools::create_var_from_codelist()` function. + +```{r codelist} +adsl_ct <- adsl_ct %>% create_var_from_codelist(metacore = metacore, input_var = RACE, - out_var = RACEN) %>% - # Removing screen failures from ARM and TRT01P to match the define and FDA guidance - mutate(ARM = if_else(ARM == "Screen Failure", NA_character_, ARM), - TRT01P = if_else(TRT01P == "Screen Failure", NA_character_, TRT01P) - ) + out_var = RACEN) head(adsl_ct, n=10) ``` -Now we have sorted out what we can easily do with controlled terminology it is time to start deriving some variables. Here you could refer directly to using the `{admiral}` template and [vignette](https://pharmaverse.github.io/admiral/cran-release/articles/adsl.html) in practice, but for the purpose of this end-to-end ADaM vignette we will share a few exposure derivations from there. We derive the start and end of treatment (which requires dates to first be converted from DTC to DTM), the treatment duration, and the safety population flag. +### Exposure derivations + +Now we have sorted out what we can easily do with controlled terminology it is time to start deriving some variables. +Here you could refer directly to using the `{admiral}` template and [vignette](https://pharmaverse.github.io/admiral/articles/adsl.html) in practice, +but for the purpose of this end-to-end ADaM vignette we will share a few exposure derivations from there. +We derive the start and end of treatment (which requires dates to first be converted from DTC to DTM), the treatment start time, the treatment duration, and the safety population flag. +Note that the populations flags are mainly company/study specific no dedicated functions are provided, but in most cases they can easily be derived using `admiral::derive_var_merged_exist_flag()`. ```{r exposure} ex_ext <- ex %>% @@ -109,88 +196,333 @@ ex_ext <- ex %>% ) adsl_raw <- adsl_ct %>% + # Treatment Start Datetime derive_vars_merged( dataset_add = ex_ext, filter_add = (EXDOSE > 0 | (EXDOSE == 0 & - str_detect(EXTRT, "PLACEBO"))) & nchar(EXSTDTC) >= 10, - new_vars = exprs(TRTSDTM = EXSTDTM), + str_detect(EXTRT, "PLACEBO"))) & !is.na(EXSTDTM), + new_vars = exprs(TRTSDTM = EXSTDTM, TRTSTMF = EXSTTMF), order = exprs(EXSTDTM, EXSEQ), mode = "first", by_vars = exprs(STUDYID, USUBJID) ) %>% + # Treatment End Datetime derive_vars_merged( dataset_add = ex_ext, filter_add = (EXDOSE > 0 | (EXDOSE == 0 & - str_detect(EXTRT, "PLACEBO"))) & nchar(EXENDTC) >= 10, - new_vars = exprs(TRTEDTM = EXENDTM), + str_detect(EXTRT, "PLACEBO"))) & !is.na(EXENDTM), + new_vars = exprs(TRTEDTM = EXENDTM, TRTETMF = EXENTMF), order = exprs(EXENDTM, EXSEQ), mode = "last", by_vars = exprs(STUDYID, USUBJID) ) %>% - derive_vars_dtm_to_dt(source_vars = exprs(TRTSDTM, TRTEDTM)) %>% # Convert Datetime variables to date - derive_var_trtdurd() %>% - derive_var_merged_exist_flag( + # Treatment Start and End Date + derive_vars_dtm_to_dt(source_vars = exprs(TRTSDTM, TRTEDTM)) %>% # Convert Datetime variables to date + # Treatment Start Time + derive_vars_dtm_to_tm(source_vars = exprs(TRTSDTM)) %>% + # Treatment Duration + derive_var_trtdurd() %>% + # Safety Population Flag + derive_var_merged_exist_flag( dataset_add = ex, by_vars = exprs(STUDYID, USUBJID), new_var = SAFFL, condition = (EXDOSE > 0 | (EXDOSE == 0 & str_detect(EXTRT, "PLACEBO"))) - ) %>% - drop_unspec_vars(metacore) # This will drop any columns that aren't specified in the metacore object + ) + +head(adsl_raw %>% select(STUDYID, USUBJID, TRTSDTM, TRTSTM, TRTSTMF, TRTSDT, TRTEDTM, TRTETMF, TRTEDT, TRTDURD, SAFFL), n=10) +``` + +This call returns the original data frame with the column `TRTSDTM`, `TRTSTMF`, `TRTEDTM`, and `TRTETMF` added. +Exposure observations with incomplete date and zero doses of non placebo treatments are ignored. +Missing time parts are imputed as first or last for start and end date respectively. + +### Derive Treatment Variables + +The mapping of the treatment variables is left to the ADaM programmer. An example mapping for a study without periods may be: + +```{r treatment_char, eval=TRUE} +adsl <- adsl_raw %>% + mutate(TRT01P = if_else(ARM %in% c("Screen Failure", "Not Assigned", "Not Treated"), "No Treatment", ARM), + TRT01A = if_else(ACTARM %in% c("Screen Failure", "Not Assigned", "Not Treated"), "No Treatment", ACTARM) + ) + +head(adsl %>% select(STUDYID, USUBJID, TRT01P, TRT01A), n=10) +``` + +For studies with periods see the ["Visit and Period Variables" vignette](https://pharmaverse.github.io/admiral/articles/visits_periods.html#periods_adsl). + +The corresponding numeric variables can be derived using the `metatools` package with the `{metacore}` objects +that we created at the very beginning. The function `metatools::create_var_from_codelist()` is used in below example. + +```{r treatment_num, eval=TRUE} +adsl <- adsl %>% + create_var_from_codelist(metacore, input_var = TRT01P, out_var = TRT01PN) %>% + create_var_from_codelist(metacore, input_var = TRT01A, out_var = TRT01AN) + +head(adsl %>% select(STUDYID, USUBJID, TRT01P, TRT01PN, TRT01A, TRT01AN), n=10) +``` + +### Derive Disposition Variables + +The functions `admiral::derive_vars_dt()` and `admiral::derive_vars_merged()` can be used to derive disposition dates. +First the character disposition date (`DS.DSSTDTC`) is converted to a numeric date (`DSSTDT`) calling `admiral::derive_vars_dt()`. +The `DS` dataset is extended by the `DSSTDT` variable because the date is required by other derivations, e.g., `RANDDT` as well. +Then the relevant disposition date is selected by adjusting the `filter_add` argument. + +To add the End of Study date (`EOSDT`) to the input dataset, a call could be: + +```{r disposition, eval=TRUE} +# Convert character date to numeric date without imputation +ds_ext <- derive_vars_dt( + ds, + dtc = DSSTDTC, + new_vars_prefix = "DSST" +) -head(adsl_raw, n=10) +adsl <- adsl %>% + derive_vars_merged( + dataset_add = ds_ext, + by_vars = exprs(STUDYID, USUBJID), + new_vars = exprs(EOSDT = DSSTDT), + filter_add = DSCAT == "DISPOSITION EVENT" & DSDECOD != "SCREEN FAILURE" + ) +``` + +The `admiral::derive_vars_dt()` function allows to impute partial dates as well. +If imputation is needed and missing days are to be imputed to the first of the month and +missing months to the first month of the year, set `highest_imputation = "M"`. + + +The End of Study status (`EOSSTT`) based on `DSCAT` and `DSDECOD` from `DS` can be derived +using the function `admiral::derive_vars_merged()`. +The relevant observations are selected by adjusting the `filter_add` argument. +A function mapping `DSDECOD` values to `EOSSTT` values can be defined and used in the +`new_vars` argument. The mapping for the call below is + +- `"COMPLETED"` if `DSDECOD == "COMPLETED"` +- `NA_character_` if `DSDECOD` is `"SCREEN FAILURE"` +- `"DISCONTINUED"` otherwise + +Example function `format_eosstt()`: + +```{r eval=TRUE} +format_eosstt <- function(x) { + case_when( + x %in% c("COMPLETED") ~ "COMPLETED", + x %in% c("SCREEN FAILURE") ~ NA_character_, + TRUE ~ "DISCONTINUED" + ) +} +``` + +The customized mapping function `format_eosstt()` can now be passed to the main +function. For subjects without a disposition event the end of study status is +set to `"ONGOING"` by specifying the `missing_values` argument. + +```{r eval=TRUE} +adsl <- adsl %>% + derive_vars_merged( + dataset_add = ds, + by_vars = exprs(STUDYID, USUBJID), + filter_add = DSCAT == "DISPOSITION EVENT", + new_vars = exprs(EOSSTT = format_eosstt(DSDECOD)), + missing_values = exprs(EOSSTT = "ONGOING") + ) + +head(adsl %>% select(STUDYID, USUBJID, EOSDT, EOSSTT), n=10) +``` + +If the derivation must be changed, the user can create his/her own function to map `DSDECOD` to a suitable `EOSSTT` value. + +The Imputed Death Date (`DTHDT`) can be derived using the `admiral::derive_vars_dt()` function. + +```{r eval=TRUE} +adsl <- adsl %>% + derive_vars_dt( + new_vars_prefix = "DTH", + dtc = DTHDTC, + highest_imputation = "M", + date_imputation = "first" + ) + +head(adsl %>% select(STUDYID, USUBJID, DTHDT, DTHDTF), n=10) ``` -```{r, warning=FALSE, message=FALSE, include=FALSE} -# Create dummy variables to match metacore specs to avoid later errors -# In practice these would be mainly created using derivation functions from admiral -adsl_raw <- adsl_raw %>% +Further dates such as Randomization Date (`RANDDT`), Screen fail date (`SCRFDT`), and Last Retrieval Date (`FRVDT`), +can also be derived using `admiral::derive_vars_merged()` since these are selected dates based on filters and merged back to the original dataset. + +```{r eval=TRUE} +adsl <- adsl %>% + derive_vars_merged( + dataset_add = ds_ext, + by_vars = exprs(STUDYID, USUBJID), + new_vars = exprs(RANDDT = DSSTDT), + filter_add = DSDECOD == "RANDOMIZED", + ) %>% + derive_vars_merged( + dataset_add = ds_ext, + by_vars = exprs(STUDYID, USUBJID), + new_vars = exprs(SCRFDT = DSSTDT), + filter_add = DSCAT == "DISPOSITION EVENT" & DSDECOD == "SCREEN FAILURE" + ) %>% + derive_vars_merged( + dataset_add = ds_ext, + by_vars = exprs(STUDYID, USUBJID), + new_vars = exprs(FRVDT = DSSTDT), + filter_add = DSCAT == "OTHER EVENT" & DSDECOD == "FINAL RETRIEVAL VISIT" + ) + +head(adsl %>% select(STUDYID, USUBJID, RANDDT, SCRFDT, FRVDT), n=10) +``` + +The function `admiral::derive_vars_duration()` can now be used to derive duration relative to death like the Relative +Day of Death (`DTHADY`) or the numbers of days from last dose to death (`LDDTHELD`). + +```{r eval=TRUE} +adsl <- adsl %>% + derive_vars_duration( + new_var = DTHADY, + start_date = TRTSDT, + end_date = DTHDT + ) %>% + derive_vars_duration( + new_var = LDDTHELD, + start_date = TRTEDT, + end_date = DTHDT, + add_one = FALSE + ) + +head(adsl %>% select(STUDYID, USUBJID, DTHDT, TRTSDT, TRTEDT, DTHADY, LDDTHELD), n=10) +``` + +Having the Randomization Date added to the dataset also allows to derive a Population Flag. +Randomized Population Flag (`RANDFL`) can be computed using a customized function. + +```{r eval=TRUE} +assign_randfl <- function(x) { + if_else(!is.na(x), "Y", NA_character_) +} + +adsl <- adsl %>% mutate( - SITEGR1 = NA, - TRT01PN = NA, - TRT01A = NA, - TRT01AN = NA, - AVGDD = NA, - CUMDOSE = NA, - ITTFL = NA, - EFFFL = NA, - COMP8FL = NA, - COMP16FL = NA, - COMP24FL = NA, - DISCONFL = NA, - DSRAEFL = NA, - BMIBL = NA, - BMIBLGR1 = NA, - HEIGHTBL = NA, - WEIGHTBL = NA, - EDUCLVL = NA, - DISONSDT = NA, - DURDIS = NA, - DURDSGR1 = NA, - VISIT1DT = NA, - VISNUMEN = NA, - RFENDT = NA, - DCDECOD = NA, - EOSSTT = NA, - DCSREAS = NA, - MMSETOT = NA + RANDFL = assign_randfl(RANDDT) ) + +head(adsl %>% select(STUDYID, USUBJID, RANDDT, RANDFL), n=10) ``` -## Apply Metadata to Create an eSub XPT and Perform Associated Checks +### Derive Cause of Death + +The cause of death (`DTHCAUS`) can be derived using the function `admiral::derive_vars_extreme_event()`. + +Since the cause of death could be collected/mapped in different domains (e.g. `DS`, `AE`, `DD`), it +is important the user specifies the right source(s) to derive the cause of death from. + +For example, if the date of death is collected in the `AE` form when the AE is Fatal, the cause of +death would be set to the preferred term (`AEDECOD`) of that Fatal AE, while if the date of death is +collected in the `DS` form, the cause of death would be set to the disposition term (`DSTERM`). +To achieve this, the `event()` objects within `derive_vars_extreme_event()` must be specified and defined such that they fit the +study requirement. +The function also offers the option to add some traceability variables (e.g. `DTHDOM` would +store the domain where the date of death is collected, and `DTHSEQ`could also be added to store the `xxSEQ` value of +that domain - but let's keep it simple with `DTHDOM` only). +The traceability variables should be added to the `event()` calls and included in the `new_vars` parameter of `derive_vars_extreme_event()`. + +```{r death, eval=TRUE} +adsl <- adsl %>% + derive_vars_extreme_event( + by_vars = exprs(STUDYID, USUBJID), + events = list( + event( + dataset_name = "ae", + condition = AEOUT == "FATAL", + set_values_to = exprs(DTHCAUS = AEDECOD, DTHDOM = "AE"), + ), + event( + dataset_name = "ds", + condition = DSDECOD == "DEATH" & grepl("DEATH DUE TO", DSTERM), + set_values_to = exprs(DTHCAUS = DSTERM, DTHDOM = "DS"), + ) + ), + source_datasets = list(ae = ae, ds = ds), + tmp_event_nr_var = event_nr, + order = exprs(event_nr), + mode = "first", + new_vars = exprs(DTHCAUS, DTHDOM) + ) -Now we have all the variables defined we can run some checks before applying the necessary formatting. The top four functions performing checks and sorting/ordering come from `{metatools}`, whereas the others focused around applying attributes to prepare for XPT come from `{xportr}`. At the end you could add a call to `xportr::xportr_write()` to produce the XPT file. +head(adsl %>% select(STUDYID, USUBJID, DTHDT, DTHCAUS, DTHDOM) %>% filter(!is.na(DTHDT)), n=10) +``` -```{r checks, warning=FALSE, message=FALSE} +### Derive Grouping Variables + +Following the derivation of `DTHCAUS` and related traceability variables, it is then possible to derive grouping variables such as death categories (`DTHCGRx`), +region categories (`REGIONx`), and race categories (`RACEx`). +As previously seen with `AGEGR1`, the `admiral::derive_vars_cat()` function available from version 1.2.0 can create such groups. + +```{r grouping, eval=TRUE} +region1_lookup <- exprs( + ~condition, ~REGION1, ~REGION1N, + COUNTRY %in% c("CAN", "USA"), "North America", 1, + !is.na(COUNTRY), "Rest of the World", 2, + is.na(COUNTRY), "Missing", 3 +) + +racegr1_lookup <- exprs( + ~condition, ~RACEGR1, ~RACEGR1N, + RACE %in% c("WHITE"), "White", 1, + RACE != "WHITE", "Non-white", 2, + is.na(RACE), "Missing", 3 + ) -adsl_raw %>% - check_variables(metacore) %>% # Check all variables specified are present and no more - check_ct_data(metacore, na_acceptable = TRUE) %>% # Checks all variables with CT only contain values within the CT - order_cols(metacore) %>% # Orders the columns according to the spec - sort_by_key(metacore) %>% # Sorts the rows by the sort keys - xportr_type(metacore, domain = "ADSL") %>% # Coerce variable type to match spec - xportr_length(metacore) %>% # Assigns SAS length from a variable level metadata - xportr_label(metacore) %>% # Assigns variable label from metacore specifications - xportr_df_label(metacore) # Assigns dataset label from metacore specifications +dthcgr1_lookup <- exprs( + ~condition, ~DTHCGR1, ~DTHCGR1N, + DTHDOM == "AE", "ADVERSE EVENT", 1, + !is.na(DTHDOM) & str_detect(DTHCAUS, "(PROGRESSIVE DISEASE|DISEASE RELAPSE)"), "PROGRESSIVE DISEASE", 2, + !is.na(DTHDOM) & !is.na(DTHCAUS), "OTHER", 3, + is.na(DTHDOM), NA_character_, NA +) + + +adsl <- adsl %>% + derive_vars_cat( + definition = region1_lookup + ) %>% + derive_vars_cat( + definition = racegr1_lookup + ) %>% + derive_vars_cat( + definition = dthcgr1_lookup + ) + +head(adsl %>% select(STUDYID, USUBJID, COUNTRY, REGION1, REGION1N, RACE, RACEGR1, RACEGR1N), n=10) +``` + +```{r eval=TRUE} +head(adsl %>% filter(!is.na(DTHDT)) %>% select(STUDYID, USUBJID, DTHDOM, DTHCAUS, DTHCGR1, DTHCGR1N), n=10) +``` + +### Apply Metadata to Create an eSub XPT and Perform Associated Checks + +Now we have all the variables defined we can run some checks before applying the necessary formatting. +The top four functions performing checks and sorting/ordering come from `{metatools}`, whereas +the others focused around applying attributes to prepare for XPT come from `{xportr}`. +At the end you can produce the XPT file calling `xportr::xportr_write()`. + +```{r checks, warning=FALSE, message=FALSE} +dir <- tempdir() # Specify the directory for saving the XPT file + +adsl %>% + check_variables(metacore) %>% # Check all variables specified are present and no more + check_ct_data(metacore, na_acceptable = TRUE) %>% # Checks all variables with CT only contain values within the CT + order_cols(metacore) %>% # Orders the columns according to the spec + sort_by_key(metacore) %>% # Sorts the rows by the sort keys + xportr_type(metacore, domain = "ADSL") %>% # Coerce variable type to match spec + xportr_length(metacore) %>% # Assigns SAS length from a variable level metadata + xportr_label(metacore) %>% # Assigns variable label from metacore specifications + xportr_df_label(metacore) %>% # Assigns dataset label from metacore specifications + xportr_write(file.path(dir, "adsl.xpt"), metadata = metacore, domain = "ADSL") ``` diff --git a/adam/advs.R b/adam/advs.R new file mode 100644 index 0000000..9c07243 --- /dev/null +++ b/adam/advs.R @@ -0,0 +1,332 @@ +## ----r setup, message=FALSE, warning=FALSE, results='hold'-------------------- +library(metacore) +library(metatools) +library(pharmaversesdtm) +library(admiral) +library(xportr) +library(dplyr) +library(tidyr) +library(lubridate) +library(stringr) + +# Read in input data +adsl <- pharmaverseadam::adsl +vs <- pharmaversesdtm::vs + +vs <- convert_blanks_to_na(vs) + +## ----r echo=TRUE-------------------------------------------------------------- +# ---- Load Specs for Metacore ---- +metacore <- spec_to_metacore( + path = "./metadata/safety_specs.xlsx", + where_sep_sheet = FALSE, + quiet = TRUE +) %>% + select_dataset("ADVS") + +## ----r------------------------------------------------------------------------ +# Select required ADSL variables +adsl_vars <- exprs(TRTSDT, TRTEDT, TRT01A, TRT01P) + +# Join ADSL variables with VS +advs <- vs %>% + derive_vars_merged( + dataset_add = adsl, + new_vars = adsl_vars, + by_vars = exprs(STUDYID, USUBJID) + ) + +## ----r------------------------------------------------------------------------ +# Calculate ADT, ADY +advs <- advs %>% + derive_vars_dt( + new_vars_prefix = "A", + dtc = VSDTC, + # Below arguments are default values and not necessary to add in our case + highest_imputation = "n", # means no imputation is performed on partial/missing dates + flag_imputation = "auto" # To automatically create ADTF variable when highest_imputation is "Y", "M" or "D" + ) %>% + derive_vars_dy( + reference_date = TRTSDT, + source_vars = exprs(ADT) + ) + +## ----r eval=TRUE, include=FALSE----------------------------------------------- +param_lookup <- tibble::tribble( + ~VSTESTCD, ~PARAMCD, ~PARAM, ~PARAMN, + "SYSBP", "SYSBP", " Systolic Blood Pressure (mmHg)", 1, + "DIABP", "DIABP", "Diastolic Blood Pressure (mmHg)", 2, + "PULSE", "PULSE", "Pulse Rate (beats/min)", 3, + "WEIGHT", "WEIGHT", "Weight (kg)", 4, + "HEIGHT", "HEIGHT", "Height (cm)", 5, + "TEMP", "TEMP", "Temperature (C)", 6, + "MAP", "MAP", "Mean Arterial Pressure (mmHg)", 7, + "BMI", "BMI", "Body Mass Index(kg/m^2)", 8, + "BSA", "BSA", "Body Surface Area(m^2)", 9 +) +attr(param_lookup$VSTESTCD, "label") <- "Vital Signs Test Short Name" + +## ----r------------------------------------------------------------------------ +advs <- advs %>% + # Add PARAMCD only - add PARAM etc later + derive_vars_merged_lookup( + dataset_add = param_lookup, + new_vars = exprs(PARAMCD), + by_vars = exprs(VSTESTCD), + # Below arguments are default values and not necessary to add in our case + print_not_mapped = TRUE # Printing whether some parameters are not mapped + ) + +## ----r eval=TRUE-------------------------------------------------------------- +advs <- advs %>% + mutate( + AVAL = VSSTRESN, + AVALU = VSSTRESU + ) + +## ----r eval=TRUE-------------------------------------------------------------- +advs <- advs %>% + derive_param_map( + by_vars = exprs(STUDYID, USUBJID, !!!adsl_vars, VISIT, VISITNUM, ADT, ADY, VSTPT, VSTPTNUM, AVALU), # Other variables than the defined ones here won't be populated + set_values_to = exprs(PARAMCD = "MAP"), + get_unit_expr = VSSTRESU, + filter = VSSTAT != "NOT DONE" | is.na(VSSTAT), + # Below arguments are default values and not necessary to add in our case + sysbp_code = "SYSBP", + diabp_code = "DIABP", + hr_code = NULL + ) + +## ----r eval=TRUE-------------------------------------------------------------- +advs <- advs %>% + derive_param_computed( + by_vars = exprs(STUDYID, USUBJID, VISIT, VISITNUM, ADT, ADY, VSTPT, VSTPTNUM), + parameters = "WEIGHT", + set_values_to = exprs( + AVAL = AVAL.WEIGHT / (AVAL.HEIGHT / 100)^2, + PARAMCD = "BMI", + AVALU = "kg/m^2" + ), + constant_parameters = c("HEIGHT"), + constant_by_vars = exprs(USUBJID) + ) + +## ----r eval=TRUE-------------------------------------------------------------- +advs <- advs %>% + derive_param_bsa( + by_vars = exprs(STUDYID, USUBJID, !!!adsl_vars, VISIT, VISITNUM, ADT, ADY, VSTPT, VSTPTNUM), + method = "Mosteller", + set_values_to = exprs( + PARAMCD = "BSA", + AVALU = "m^2" + ), + get_unit_expr = VSSTRESU, + filter = VSSTAT != "NOT DONE" | is.na(VSSTAT), + constant_by_vars = exprs(USUBJID), + # Below arguments are default values and not necessary to add in our case + height_code = "HEIGHT", + weight_code = "WEIGHT" + ) + +## ----r eval=TRUE-------------------------------------------------------------- +advs <- advs %>% + mutate( + ATPTN = VSTPTNUM, + ATPT = VSTPT, + AVISIT = case_when( + str_detect(VISIT, "SCREEN|UNSCHED|RETRIEVAL|AMBUL") ~ NA_character_, + !is.na(VISIT) ~ str_to_title(VISIT), + TRUE ~ NA_character_ + ), + AVISITN = as.numeric(case_when( + VISIT == "BASELINE" ~ "0", + str_detect(VISIT, "WEEK") ~ str_trim(str_replace(VISIT, "WEEK", "")), + TRUE ~ NA_character_ + )) + ) + +## ----r eval=TRUE-------------------------------------------------------------- +advs <- derive_summary_records( + dataset = advs, + dataset_add = advs, # Observations from the specified dataset are going to be used to calculate and added as new records to the input dataset. + by_vars = exprs(STUDYID, USUBJID, !!!adsl_vars, PARAMCD, AVISITN, AVISIT, ADT, ADY, AVALU), + filter_add = !is.na(AVAL), + set_values_to = exprs( + AVAL = mean(AVAL), + DTYPE = "AVERAGE" + ) +) + +## ----r eval=TRUE-------------------------------------------------------------- +advs <- derive_var_ontrtfl( + advs, + start_date = ADT, + ref_start_date = TRTSDT, + ref_end_date = TRTEDT, + filter_pre_timepoint = toupper(AVISIT) == "BASELINE" # Observations as not on-treatment +) + +## ----r include=FALSE---------------------------------------------------------- +range_lookup <- tibble::tribble( + ~PARAMCD, ~ANRLO, ~ANRHI, ~A1LO, ~A1HI, + "SYSBP", 90, 130, 70, 140, + "DIABP", 60, 80, 40, 90, + "PULSE", 60, 100, 40, 110, + "TEMP", 36.5, 37.5, 35, 38 +) + +advs <- derive_vars_merged( + advs, + dataset_add = range_lookup, + by_vars = exprs(PARAMCD) +) + +## ----r eval=TRUE-------------------------------------------------------------- +advs <- derive_var_anrind( + advs, + # Below arguments are default values and not necessary to add in our case + signif_dig = get_admiral_option("signif_digits"), + use_a1hia1lo = FALSE +) + +## ----r eval=TRUE-------------------------------------------------------------- +advs <- derive_basetype_records( + dataset = advs, + basetypes = exprs( + "LAST: AFTER LYING DOWN FOR 5 MINUTES" = ATPTN == 815, + "LAST: AFTER STANDING FOR 1 MINUTE" = ATPTN == 816, + "LAST: AFTER STANDING FOR 3 MINUTES" = ATPTN == 817, + "LAST" = is.na(ATPTN) + ) +) + +## ----r eval=TRUE-------------------------------------------------------------- +advs <- restrict_derivation( + advs, + derivation = derive_var_extreme_flag, + args = params( + by_vars = exprs(STUDYID, USUBJID, BASETYPE, PARAMCD), + order = exprs(ADT, VISITNUM, VSSEQ), + new_var = ABLFL, + mode = "last", # Determines of the first or last observation is flagged + # Below arguments are default values and not necessary to add in our case + true_value = "Y" + ), + filter = (!is.na(AVAL) & + ADT <= TRTSDT & !is.na(BASETYPE) & is.na(DTYPE) + ) +) + +## ----r eval=TRUE-------------------------------------------------------------- +advs <- derive_var_base( + advs, + by_vars = exprs(STUDYID, USUBJID, PARAMCD, BASETYPE), + source_var = AVAL, + new_var = BASE, + # Below arguments are default values and not necessary to add in our case + filter = ABLFL == "Y" +) + +advs <- derive_var_base( + advs, + by_vars = exprs(STUDYID, USUBJID, PARAMCD, BASETYPE), + source_var = ANRIND, + new_var = BNRIND +) + +## ----r eval=TRUE-------------------------------------------------------------- +advs <- restrict_derivation( + advs, + derivation = derive_var_chg, + filter = AVISITN > 0 +) + +advs <- restrict_derivation( + advs, + derivation = derive_var_pchg, + filter = AVISITN > 0 +) + +## ----r eval=TRUE-------------------------------------------------------------- +advs <- restrict_derivation( + advs, + derivation = derive_var_extreme_flag, + args = params( + new_var = ANL01FL, + by_vars = exprs(STUDYID, USUBJID, PARAMCD, AVISIT, ATPT, DTYPE), + order = exprs(ADT, AVAL), + mode = "last", # Determines of the first or last observation is flagged - As seen while deriving ABLFL + # Below arguments are default values and not necessary to add in our case + true_value = "Y" + ), + filter = !is.na(AVISITN) & ONTRTFL == "Y" +) + +## ----r eval=TRUE-------------------------------------------------------------- +advs <- advs %>% + mutate( + TRTP = TRT01P, + TRTA = TRT01A + ) + +## ----r eval=TRUE-------------------------------------------------------------- +advs <- derive_var_obs_number( + advs, + new_var = ASEQ, + by_vars = exprs(STUDYID, USUBJID), + order = exprs(PARAMCD, ADT, AVISITN, VISITNUM, ATPTN, DTYPE), + check_type = "error" +) + +## ----r eval=TRUE-------------------------------------------------------------- +avalcat_lookup <- exprs( + ~PARAMCD, ~condition, ~AVALCAT1, ~AVALCA1N, + "HEIGHT", AVAL > 140, ">140 cm", 1, + "HEIGHT", AVAL <= 140, "<= 140 cm", 2 +) + +advs <- advs %>% + derive_vars_cat( + definition = avalcat_lookup, + by_vars = exprs(PARAMCD) + ) + +## ----r eval=TRUE-------------------------------------------------------------- +advs <- advs %>% + create_var_from_codelist( + metacore, + input_var = PARAMCD, + out_var = PARAM, + decode_to_code = FALSE # input_var is the code column of the codelist + ) %>% + create_var_from_codelist( + metacore, + input_var = PARAMCD, + out_var = PARAMN + ) + +## ----r eval=TRUE-------------------------------------------------------------- +advs <- advs %>% + derive_vars_merged( + dataset_add = select(adsl, !!!negate_vars(adsl_vars)), + by_vars = exprs(STUDYID, USUBJID) + ) + +## ----r, message=FALSE, warning=FALSE------------------------------------------ +dir <- tempdir() # Specify the directory for saving the XPT file + +# Apply metadata and perform checks +advs_prefinal <- advs %>% + drop_unspec_vars(metacore) %>% # Drop unspecified variables from specs + check_variables(metacore, dataset_name = "ADVS") %>% # Check all variables specified are present and no more + order_cols(metacore) %>% # Orders the columns according to the spec + sort_by_key(metacore) # Sorts the rows by the sort keys + +# Apply apply labels, formats, and export the dataset to an XPT file. +advs_final <- advs_prefinal %>% + xportr_type(metacore) %>% + xportr_length(metacore) %>% + xportr_label(metacore) %>% + xportr_format(metacore, domain = "ADVS") %>% + xportr_df_label(metacore, domain = "ADVS") %>% + xportr_write(file.path(dir, "advs.xpt"), metadata = metacore, domain = "ADVS") diff --git a/adam/advs.qmd b/adam/advs.qmd new file mode 100644 index 0000000..4727305 --- /dev/null +++ b/adam/advs.qmd @@ -0,0 +1,624 @@ +--- +title: "ADVS" +order: 6 +--- + +```{r setup script, include=FALSE, purl=FALSE} +invisible_hook_purl <- function(before, options, ...) {knitr::hook_purl(before, options, ...); NULL} +knitr::knit_hooks$set(purl = invisible_hook_purl) +``` + +# Introduction + +This article provides a step-by-step explanation for creating an ADaM `ADVS` (Vital Signs) dataset using key pharmaverse packages along with tidyverse components. + +For the purpose of this example, we will use the `ADSL` dataset from `{pharmaverseadam}` and `vs` domain from `{pharmaversesdtm}`. + +# Load Data and Required pharmaverse Packages + +First we will load the packages required for our project. We will use `{admiral}` for the creation of analysis data. `{admiral}` requires `{dplyr}`, `{lubridate}` and `{stringr}`. We will use `{metacore}` and `{metatools}` to store and manipulate metadata from our specifications. We will use `{xportr}` to perform checks on the final data and export to a transport file. + +Then we will load our input data. + +```{r setup, message=FALSE, warning=FALSE, results='hold'} +library(metacore) +library(metatools) +library(pharmaversesdtm) +library(admiral) +library(xportr) +library(dplyr) +library(tidyr) +library(lubridate) +library(stringr) + +# Read in input data +adsl <- pharmaverseadam::adsl +vs <- pharmaversesdtm::vs + +vs <- convert_blanks_to_na(vs) +``` + +# Load Specifications for Metacore + +We have saved our specifications in an Excel file and will load them into `{metacore}` with the `metacore::spec_to_metacore()` function. + +```{r echo=TRUE} +#| label: Load Specs +#| warning: false +# ---- Load Specs for Metacore ---- +metacore <- spec_to_metacore( + path = "./metadata/safety_specs.xlsx", + # All datasets are described in the same sheet + where_sep_sheet = FALSE +) %>% + select_dataset("ADVS") +``` + +# Select ADSL Variables + +Some variables from the `ADSL` dataset required for the derivations are merged into the `VS` domain using the `admiral::derive_vars_merged()` function. +The rest of the relevant `ADSL` variables would be added later. + +```{r} +# Select required ADSL variables +adsl_vars <- exprs(TRTSDT, TRTEDT, TRT01A, TRT01P) + +# Join ADSL variables with VS +advs <- vs %>% + derive_vars_merged( + dataset_add = adsl, + new_vars = adsl_vars, + by_vars = exprs(STUDYID, USUBJID) + ) + +head(advs, n=10) +``` + +# Start Building Derivations + +The first derivation step we are going to do is to compute the Analysis Date and Relative Analysis Day with the variables merged from `ADSL` dataset. +The resulting dataset has the 2 columns created. + +```{r} +# Calculate ADT, ADY +advs <- advs %>% + derive_vars_dt( + new_vars_prefix = "A", + dtc = VSDTC, + # Below arguments are default values and not necessary to add in our case + highest_imputation = "n", # means no imputation is performed on partial/missing dates + flag_imputation = "auto" # To automatically create ADTF variable when highest_imputation is "Y", "M" or "D" + ) %>% + derive_vars_dy( + reference_date = TRTSDT, + source_vars = exprs(ADT) + ) + +head(advs %>% select(STUDYID, USUBJID, VISIT, VISITNUM, VSTESTCD, VSTEST, VSDTC, !!!adsl_vars, ADT, ADY), n=10) +``` + +## Assign `PARAMCD`, `PARAM`, `PARAMN` + +To assign parameter level values such as `PARAMCD`, `PARAM`, `PARAMN`, +etc., a lookup can be created to join to the source data. + +For example, when creating `ADVS`, a lookup based on the SDTM `--TESTCD` value +may be created: + +`VSTESTCD` | `PARAMCD` | `PARAM` | `PARAMN` +--------- | --------- | -------- | ------- +SYSBP | SYSBP | Systolic Blood Pressure (mmHg) | 1 +DIABP | DIABP | Diastolic Blood Pressure (mmHg) | 2 +PULSE | PULSE | Pulse Rate (beats/min) | 3 +WEIGHT | WEIGHT | Weight (kg) | 4 +HEIGHT | HEIGHT | Height (cm) | 5 +TEMP | TEMP | Temperature (C) | 6 +MAP | MAP | Mean Arterial Pressure | 7 +BMI | BMI | Body Mass Index(kg/m^2) | 8 +BSA | BSA | Body Surface Area(m^2) | 9 + +This lookup may now be joined to the source data: + +```{r eval=TRUE, include=FALSE} +param_lookup <- tibble::tribble( + ~VSTESTCD, ~PARAMCD, ~PARAM, ~PARAMN, + "SYSBP", "SYSBP", " Systolic Blood Pressure (mmHg)", 1, + "DIABP", "DIABP", "Diastolic Blood Pressure (mmHg)", 2, + "PULSE", "PULSE", "Pulse Rate (beats/min)", 3, + "WEIGHT", "WEIGHT", "Weight (kg)", 4, + "HEIGHT", "HEIGHT", "Height (cm)", 5, + "TEMP", "TEMP", "Temperature (C)", 6, + "MAP", "MAP", "Mean Arterial Pressure (mmHg)", 7, + "BMI", "BMI", "Body Mass Index(kg/m^2)", 8, + "BSA", "BSA", "Body Surface Area(m^2)", 9 +) +attr(param_lookup$VSTESTCD, "label") <- "Vital Signs Test Short Name" +``` + +At this stage, only `PARAMCD` is required to perform the derivations. Additional +derived parameters may be added, so only `PARAMCD` is joined to the datasets at +this point. All other variables related to `PARAMCD` (e.g. `PARAM`, `PARAMN`, ...) +will be added when all `PARAMCD` are derived. + +```{r} +advs <- advs %>% + # Add PARAMCD only - add PARAM etc later + derive_vars_merged_lookup( + dataset_add = param_lookup, + new_vars = exprs(PARAMCD), + by_vars = exprs(VSTESTCD), + # Below arguments are default values and not necessary to add in our case + print_not_mapped = TRUE # Printing whether some parameters are not mapped + ) + +head(advs %>% select(STUDYID, USUBJID, VISIT, VISITNUM, VSTESTCD, VSTEST, VSDTC, !!!adsl_vars, ADT, ADY, PARAMCD), n=10) +``` + +## Derive Results and Units (`AVAL`, `AVALU`) + +The mapping of `AVAL` and `AVALU` is left to the ADaM programmer. An +example mapping may be: + +```{r eval=TRUE} +advs <- advs %>% + mutate( + AVAL = VSSTRESN, + AVALU = VSSTRESU + ) + +head(advs %>% select(STUDYID, USUBJID, VISIT, VISITNUM, VSTESTCD, VSTEST, VSDTC, !!!adsl_vars, ADT, ADY, PARAMCD, AVAL, AVALU), n=10) +``` + +In this example, as is often the case for ADVS, all `AVAL` values are numeric without any corresponding non-redundant text value for `AVALC`. +Per recommendation in ADaMIG v1.3 we do not map `AVALC`. + +## Derive Additional Parameters (e.g. `MAP`, `BMI` or `BSA` for `ADVS`) + +Optionally derive new parameters creating `PARAMCD` and `AVAL`. Note that only +variables specified in the `by_vars` argument will be populated in the newly +created records. This is relevant to the functions `admiral::derive_param_map`, +`admiral::derive_param_bsa`, `admiral::derive_param_bmi`, and `admiral::derive_param_qtc`. + +Below is an example of creating `Mean Arterial Pressure` for `ADVS` using the wrapper function `admiral::derive_param_map()` + +```{r eval=TRUE} +advs <- advs %>% + derive_param_map( + by_vars = exprs(STUDYID, USUBJID, !!!adsl_vars, VISIT, VISITNUM, ADT, ADY, VSTPT, VSTPTNUM, AVALU), # Other variables than the defined ones here won't be populated + set_values_to = exprs(PARAMCD = "MAP"), + get_unit_expr = VSSTRESU, + filter = VSSTAT != "NOT DONE" | is.na(VSSTAT), + # Below arguments are default values and not necessary to add in our case + sysbp_code = "SYSBP", + diabp_code = "DIABP", + hr_code = NULL + ) +``` + +Similarly we could create `Body Mass Index` (BMI) for `ADVS` using the wrapper function `admiral::derive_param_bmi()`, instead we will see in below example +how to use the more generic function `admiral::derive_param_computed()` +Note that if height is collected only once use `constant_parameters` to define the corresponding parameter which will be merged to the other parameters and `constant_by_vars` to specify the subject-level variable to merge on. +Otherwise BMI is only calculated for visits where both parameters `HEIGHT` and `WEIGHT` are collected. + +```{r eval=TRUE} +advs <- advs %>% + derive_param_computed( + by_vars = exprs(STUDYID, USUBJID, VISIT, VISITNUM, ADT, ADY, VSTPT, VSTPTNUM), + parameters = "WEIGHT", + set_values_to = exprs( + AVAL = AVAL.WEIGHT / (AVAL.HEIGHT / 100)^2, + PARAMCD = "BMI", + AVALU = "kg/m^2" + ), + constant_parameters = c("HEIGHT"), + constant_by_vars = exprs(USUBJID) + ) +``` + +Likewise, wrapper function `admiral::derive_param_bsa()` call below, to create parameter `Body Surface Area` (BSA) for `ADVS` domain. +Note that if height is collected only once use `constant_by_vars` to specify the subject-level variable to merge on. +Otherwise BSA is only calculated for visits where both parameters `HEIGHT` and `WEIGHT` are collected. + +```{r eval=TRUE} +advs <- advs %>% + derive_param_bsa( + by_vars = exprs(STUDYID, USUBJID, !!!adsl_vars, VISIT, VISITNUM, ADT, ADY, VSTPT, VSTPTNUM), + method = "Mosteller", + set_values_to = exprs( + PARAMCD = "BSA", + AVALU = "m^2" + ), + get_unit_expr = VSSTRESU, + filter = VSSTAT != "NOT DONE" | is.na(VSSTAT), + constant_by_vars = exprs(USUBJID), + # Below arguments are default values and not necessary to add in our case + height_code = "HEIGHT", + weight_code = "WEIGHT" + ) +``` + +```{r, eval=TRUE, echo=FALSE} +head(advs %>% filter(PARAMCD == "MAP") %>% select(STUDYID, USUBJID, VSTESTCD, PARAMCD, VISIT, VSTPT, AVAL, AVALU), n=10) +``` +```{r, eval=TRUE, echo=FALSE} +head(advs %>% filter(PARAMCD == "BMI") %>% select(STUDYID, USUBJID, VSTESTCD, PARAMCD, VISIT, VSTPT, AVAL, AVALU), n=10) +``` +```{r, eval=TRUE, echo=FALSE} +head(advs %>% filter(PARAMCD == "BSA") %>% select(STUDYID, USUBJID, VSTESTCD, PARAMCD, VISIT, VSTPT, AVAL, AVALU), n=10) +``` + +## Derive Timing Variables (e.g. `AVISIT`, `ATPT`, `ATPTN`) + +Categorical timing variables are protocol and analysis dependent. Below is a simple example. + +```{r eval=TRUE} +advs <- advs %>% + mutate( + ATPTN = VSTPTNUM, + ATPT = VSTPT, + AVISIT = case_when( + str_detect(VISIT, "SCREEN|UNSCHED|RETRIEVAL|AMBUL") ~ NA_character_, + !is.na(VISIT) ~ str_to_title(VISIT), + TRUE ~ NA_character_ + ), + AVISITN = as.numeric(case_when( + VISIT == "BASELINE" ~ "0", + str_detect(VISIT, "WEEK") ~ str_trim(str_replace(VISIT, "WEEK", "")), + TRUE ~ NA_character_ + )) + ) +``` + +For assigning visits based on time windows and deriving periods, subperiods, and phase variables see +the ["Visit and Period Variables" vignette](visits_periods.html). + +## Derive summary records (e.g. mean of the triplicates at each time point) ---- + +For adding new records based on aggregating records `admiral::derive_summary_records()` +can be used. For the new records only the variables specified by `by_vars` and +`set_values_to` are populated. + +For each subject, Vital Signs parameter, visit, and date add a record holding +the average value for observations on that date. +Set `DTYPE` to `AVERAGE`. + +```{r eval=TRUE} +advs <- derive_summary_records( + dataset = advs, + dataset_add = advs, # Observations from the specified dataset are going to be used to calculate and added as new records to the input dataset. + by_vars = exprs(STUDYID, USUBJID, !!!adsl_vars, PARAMCD, AVISITN, AVISIT, ADT, ADY, AVALU), + filter_add = !is.na(AVAL), + set_values_to = exprs( + AVAL = mean(AVAL), + DTYPE = "AVERAGE" + ) +) +``` + +## Timing Flag Variables (e.g. `ONTRTFL`) + +In some analyses, it may be necessary to flag an observation as on-treatment. +The admiral function `admiral::derive_var_ontrtfl()` can be used. + +For example, if on-treatment is defined as any observation between treatment +start and treatment end, the flag may be derived as: + +```{r eval=TRUE} +advs <- derive_var_ontrtfl( + advs, + start_date = ADT, + ref_start_date = TRTSDT, + ref_end_date = TRTEDT, + filter_pre_timepoint = toupper(AVISIT) == "BASELINE" # Observations as not on-treatment + ) +``` + +```{r, eval=TRUE, echo=FALSE} +head(advs %>% filter(PARAMCD == "DIABP" & toupper(VISIT) == "WEEK 2") %>% select(USUBJID, PARAMCD, ADT, TRTSDT, TRTEDT, ONTRTFL), n=10) +``` + +## Assign Reference Range Indicator (`ANRIND`) + +The admiral function `derive_var_anrind()` may be used to derive the reference +range indicator `ANRIND`. + +This function requires the reference range boundaries to exist on the data frame +(`ANRLO`, `ANRHI`) and also accommodates the additional boundaries `A1LO` and `A1HI`. + +```{r include=FALSE} +range_lookup <- tibble::tribble( + ~PARAMCD, ~ANRLO, ~ANRHI, ~A1LO, ~A1HI, + "SYSBP", 90, 130, 70, 140, + "DIABP", 60, 80, 40, 90, + "PULSE", 60, 100, 40, 110, + "TEMP", 36.5, 37.5, 35, 38 +) + +advs <- derive_vars_merged( + advs, + dataset_add = range_lookup, + by_vars = exprs(PARAMCD) +) +``` + +The function is called as: + +```{r eval=TRUE} +advs <- derive_var_anrind( + advs, + # Below arguments are default values and not necessary to add in our case + signif_dig = get_admiral_option("signif_digits"), + use_a1hia1lo = FALSE +) +``` + +```{r, eval=TRUE, echo=FALSE} +head(advs %>% filter(PARAMCD == "DIABP" & toupper(VISIT) == "WEEK 2") %>% select(USUBJID, PARAMCD, AVAL, ANRLO, ANRHI, A1LO, A1HI, ANRIND), n=10) +``` + +## Derive Baseline (`BASETYPE`, `ABLFL`, `BASE`, `BNRIND`) + +The `BASETYPE` should be derived using the function `admiral::derive_basetype_records()`. +The parameter `basetypes` of this function requires a named list of expression +detailing how the `BASETYPE` should be assigned. Note, if a record falls into +multiple expressions within the basetypes expression, a row will be produced for +each `BASETYPE`. + +```{r eval=TRUE} +advs <- derive_basetype_records( + dataset = advs, + basetypes = exprs( + "LAST: AFTER LYING DOWN FOR 5 MINUTES" = ATPTN == 815, + "LAST: AFTER STANDING FOR 1 MINUTE" = ATPTN == 816, + "LAST: AFTER STANDING FOR 3 MINUTES" = ATPTN == 817, + "LAST" = is.na(ATPTN) + ) +) + +count(advs, ATPT, ATPTN, BASETYPE) +``` + +It is important to derive `BASETYPE` first so that it can be utilized in +subsequent derivations. This will be important if the data frame contains +multiple values for `BASETYPE`. + +Next, the analysis baseline flag `ABLFL` can be derived using the `{admiral}` +function `admiral::derive_var_extreme_flag()`. For example, if baseline is defined as the last +non-missing `AVAL` prior or on `TRTSDT`, the function call for `ABLFL` would be: + +```{r eval=TRUE} +advs <- restrict_derivation( + advs, + derivation = derive_var_extreme_flag, + args = params( + by_vars = exprs(STUDYID, USUBJID, BASETYPE, PARAMCD), + order = exprs(ADT, VISITNUM, VSSEQ), + new_var = ABLFL, + mode = "last", # Determines of the first or last observation is flagged + # Below arguments are default values and not necessary to add in our case + true_value = "Y" + ), + filter = (!is.na(AVAL) & + ADT <= TRTSDT & !is.na(BASETYPE) & is.na(DTYPE) + ) +) +``` + +```{r, eval=TRUE, echo=FALSE} +head(advs %>% filter(PARAMCD == "DIABP" & toupper(VISIT) %in% c("WEEK 2", "BASELINE")) %>% select(USUBJID, BASETYPE, PARAMCD, ADT, TRTSDT, ATPTN, TRTSDT, ABLFL), n=30) +``` + +Lastly, the `BASE`, and `BNRIND` columns can be derived using the `{admiral}` function +`admiral::derive_var_base()`. Example calls are: + +```{r eval=TRUE} +advs <- derive_var_base( + advs, + by_vars = exprs(STUDYID, USUBJID, PARAMCD, BASETYPE), + source_var = AVAL, + new_var = BASE, + # Below arguments are default values and not necessary to add in our case + filter = ABLFL == "Y" +) + +advs <- derive_var_base( + advs, + by_vars = exprs(STUDYID, USUBJID, PARAMCD, BASETYPE), + source_var = ANRIND, + new_var = BNRIND +) +``` + +```{r, eval=TRUE, echo=FALSE} +head(advs %>% filter(PARAMCD == "DIABP" & toupper(VISIT) %in% c("WEEK 2", "BASELINE")) %>% select(USUBJID, BASETYPE, PARAMCD, ABLFL, BASE, ANRIND, BNRIND), n=10) +``` + +## Derive Change from Baseline (`CHG`, `PCHG`) + +Change and percent change from baseline can be derived using the `{admiral}` +functions `admiral::derive_var_chg()` and `admiral::derive_var_pchg()`. These functions expect `AVAL` +and `BASE` to exist in the data frame. The `CHG` is simply `AVAL - BASE` and the +`PCHG` is `(AVAL - BASE) / absolute value (BASE) * 100`. +If the variables should not be derived for all records, e.g., for post-baseline +records only, `admiral::restrict_derivation()` can be used. +Examples calls are: + +```{r eval=TRUE} +advs <- restrict_derivation( + advs, + derivation = derive_var_chg, + filter = AVISITN > 0 +) + +advs <- restrict_derivation( + advs, + derivation = derive_var_pchg, + filter = AVISITN > 0 +) +``` + +```{r, eval=TRUE, echo=FALSE} +head(advs %>% filter(PARAMCD == "DIABP" & toupper(VISIT) %in% c("WEEK 2", "WEEK 8")) %>% select(USUBJID, PARAMCD, VISIT, BASE, AVAL, CHG, PCHG), n=30) +``` + +## Derive Analysis Flags (e.g. `ANL01FL`) + +In most finding ADaMs, an analysis flag is derived to identify the appropriate +observation(s) to use for a particular analysis when a subject has multiple +observations within a particular timing period. + +In this situation, an analysis flag (e.g. `ANLzzFL`) may be used to choose the +appropriate record for analysis. + +This flag may be derived using the `{admiral}` function `admiral::derive_var_extreme_flag()`. +For this example, we will assume we would like to choose within the Post-Baseline records the latest and +highest value by `USUBJID`, `PARAMCD`, `AVISIT`, and `ATPT`. + +```{r eval=TRUE} +advs <- restrict_derivation( + advs, + derivation = derive_var_extreme_flag, + args = params( + new_var = ANL01FL, + by_vars = exprs(STUDYID, USUBJID, PARAMCD, AVISIT, ATPT, DTYPE), + order = exprs(ADT, AVAL), + mode = "last", # Determines of the first or last observation is flagged - As seen while deriving ABLFL + # Below arguments are default values and not necessary to add in our case + true_value = "Y" + ), + filter = !is.na(AVISITN) & ONTRTFL == "Y" +) +``` + +```{r, eval=TRUE, echo=FALSE} +head(advs %>% filter(PARAMCD == "DIABP" & toupper(VISIT) %in% c("WEEK 2", "WEEK 8")) %>% select(USUBJID, PARAMCD, AVISIT, ATPTN, ADT, AVAL, ANL01FL), n=30) +``` + +## Assign Treatment (`TRTA`, `TRTP`) + +`TRTA` and `TRTP` must match at least one value of the character treatment +variables in ADSL (e.g., `TRTxxA`/`TRTxxP`, `TRTSEQA`/`TRTSEQP`, +`TRxxAGy`/`TRxxPGy`). + +An example of a simple implementation for a study without periods could be: + +```{r eval=TRUE} +advs <- advs %>% + mutate( + TRTP = TRT01P, + TRTA = TRT01A + ) + +count(advs, TRTP, TRTA, TRT01P, TRT01A) +``` + +For studies with periods see the ["Visit and Period Variables" vignette](visits_periods.html#treatment_bds). + +## Assign `ASEQ` + +The `{admiral}` function `admiral::derive_var_obs_number()` can be used to derive `ASEQ`. An +example call is: + +```{r eval=TRUE} +advs <- derive_var_obs_number( + advs, + new_var = ASEQ, + by_vars = exprs(STUDYID, USUBJID), + order = exprs(PARAMCD, ADT, AVISITN, VISITNUM, ATPTN, DTYPE), + check_type = "error" +) +``` + +```{r, eval=TRUE, echo=FALSE} +head(advs %>% filter(USUBJID == "01-701-1015") %>% select(USUBJID, PARAMCD, ADT, AVISITN, ATPTN, VISIT, ADT, ASEQ), n=30) +``` + +## Derive Categorization Variables (`AVALCATy`) + +We can use the `admiral::derive_vars_cat()` function to derive the categorization variables. + +```{r eval=TRUE} +avalcat_lookup <- exprs( + ~PARAMCD, ~condition, ~AVALCAT1, ~AVALCA1N, + "HEIGHT", AVAL > 140, ">140 cm", 1, + "HEIGHT", AVAL <= 140, "<= 140 cm", 2 +) + +advs <- advs %>% + derive_vars_cat( + definition = avalcat_lookup, + by_vars = exprs(PARAMCD) + ) +``` + +```{r, eval=TRUE, echo=FALSE} +head(advs %>% filter(PARAMCD == "HEIGHT") %>% select(USUBJID, PARAMCD, AVAL, AVALCA1N, AVALCAT1), n=30) +``` + +## Assign Parameter Level Values (`PARAM`, `PARAMN`) + +When all `PARAMCD` have been derived and added to the dataset, the other information +from the look-up table (`PARAM`, `PARAMN`,...) should be added using `admiral::derive_vars_merged()` function. + +The other way to assign the parameter level values is using the `metatools` package with the `{metacore}` objects +that we created at the beginning. The function `metatools::create_var_from_codelist()` is used in below exemple. + +```{r eval=TRUE} +advs <- advs %>% + create_var_from_codelist( + metacore, + input_var = PARAMCD, + out_var = PARAM, + decode_to_code = FALSE # input_var is the code column of the codelist + ) %>% + create_var_from_codelist( + metacore, + input_var = PARAMCD, + out_var = PARAMN + ) +``` + +```{r, eval=TRUE, echo=FALSE} +head(advs %>% filter(USUBJID == "01-716-1024") %>% select(USUBJID, VSTESTCD, PARAMCD, PARAM, PARAMN), n=30) +``` + +## Add ADSL variables + +If needed, the other `ADSL` variables can now be added. +List of ADSL variables already merged held in vector `adsl_vars` + +```{r eval=TRUE} +advs <- advs %>% + derive_vars_merged( + dataset_add = select(adsl, !!!negate_vars(adsl_vars)), + by_vars = exprs(STUDYID, USUBJID) + ) +``` + +```{r, eval=TRUE, echo=FALSE} +head(advs %>% filter(USUBJID == "01-701-1015") %>% select(USUBJID, RFSTDTC, RFENDTC, DTHDTC, DTHFL, AGE, AGEU), n=30) +``` + +# Apply Metadata and eSub Checks + +We use `{metatools}` and `{xportr}` to perform checks, apply metadata such as types, lengths, labels, and write the dataset to an XPT file. + +```{r, message=FALSE, warning=FALSE} +dir <- tempdir() # Specify the directory for saving the XPT file + +# Apply metadata and perform checks +advs_prefinal <- advs %>% + drop_unspec_vars(metacore) %>% # Drop unspecified variables from specs + check_variables(metacore, dataset_name = "ADVS") %>% # Check all variables specified are present and no more + order_cols(metacore) %>% # Orders the columns according to the spec + sort_by_key(metacore) # Sorts the rows by the sort keys + +# Apply apply labels, formats, and export the dataset to an XPT file. +advs_final <- advs_prefinal %>% + xportr_type(metacore) %>% + xportr_length(metacore) %>% + xportr_label(metacore) %>% + xportr_format(metacore, domain = "ADVS") %>% + xportr_df_label(metacore, domain = "ADVS") %>% + xportr_write(file.path(dir, "advs.xpt"), metadata = metacore, domain = "ADVS") +``` diff --git a/metadata/safety_specs.xlsx b/metadata/safety_specs.xlsx new file mode 100644 index 0000000..cdcbf6e Binary files /dev/null and b/metadata/safety_specs.xlsx differ