-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy pathf - long_nhanes_subset_function.R
98 lines (80 loc) · 5.01 KB
/
f - long_nhanes_subset_function.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~#
########################################## CREATE THE LONG DATASET ##########################################
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~#
# Purpose: This function creates and saves the long nhanes_subset_dataset that I'll be using for analysis
#
# Inputs: nhanes_subset - dataframe containing all variables including cell types, chemicals,
# and covariates (nhanes_subset_dataset)
# subset_chemicals - dataframe of chemicals to include for analysis
#
# Outputs: long_nhanes_subset_dataset - dataframe of subsetted covariates, cell types, and chemicals
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~#
long_nhanes_subset_function <- function(nhanes_subset,
subset_chemicals)
{
library(tidyverse)
library(magrittr)
# setwd(current_directory)
chems <- subset_chemicals$chemical_codename_use
#############################################################################################################
################################ Create The Long Dataset For Chemicals And Cells ############################
#############################################################################################################
#make the long dataset - grouped by chemicals
# long_chemicals <- gather(data = nhanes_subset, #this is the wide dataset subsetted to full demog and chems
# key = chemical_codename, #this is the name of the new column to describe the chemicals
# value = chem_measurement, #these are the chemical measurements
# all_of(chems), #these are the columns to adjust
# factor_key=TRUE #keeps the columns in order
# ) %>%
# drop_na(chem_measurement)
long_chemicals <- pivot_longer(data = nhanes_subset,
cols = all_of(chems),
names_to = "chemical_codename",
values_to = "chem_measurement") %>%
drop_na(chem_measurement)
# print(str(long_chemicals))
# print(unique(long_chemicals$chemical_codename))
# print(length(unique(long_chemicals$SEQN)))
#############################################################################################################
#make the long dataset - grouped by chemicals and cell types
# long_chemicals_cells <- gather(data = long_chemicals, #this is the long dataset from above
# key = celltype_codename, #this is the name of the new column
# value = cell_measurement, #these are the cell type measurements
# LBXLYPCT:LBXMCVSI #these are the columns to adjust
# )
immune <- c(
"LBDLYMNO", #lymphocyte counts
"LBDMONO", #monocyte counts
"LBDNENO", #neutrophil counts
"LBDEONO", #eosinophil counts
"LBDBANO", #basophil counts
"LBXWBCSI", #WBC count
"LBXRBCSI", #RBC count
"LBXMCVSI" #MCV
)
long_chemicals_cells <- pivot_longer(data = long_chemicals,
cols = all_of(immune), #LBXLYPCT:LBXMCVSI,
names_to = "celltype_codename",
values_to = "cell_measurement")
# print(str(long_chemicals_cells))
# print(long_chemicals_cells$celltype_codename %>% unique(.))
rm(long_chemicals)
#############################################################################################################
###################################### Create Log Transformed Variables #####################################
#############################################################################################################
#chem_log_measurement - also log transform creatinine and smoking variable
long_nhanes_subset_dataset <- long_chemicals_cells %>%
# group_by(chemical_codename) %>%
mutate(chem_log_measurement = log2(chem_measurement)) %>%
mutate(SMOKING = log2(SMOKING)) %>%
mutate(URXUCR = log2(URXUCR)) %>%
drop_na(chem_log_measurement) %>%
dplyr::select(-chem_measurement) %>%
mutate(RIAGENDR = relevel(factor(RIAGENDR), ref = "1"),
RIDRETH1 = relevel(factor(RIDRETH1), ref = "3")) %>%
ungroup()
long_nhanes_subset_dataset$SDDSRVYR <- as.integer(long_nhanes_subset_dataset$SDDSRVYR)
print(str(long_nhanes_subset_dataset))
#############################################################################################################
return(long_nhanes_subset_dataset)
}