forked from Al-Murphy/MungeSumstats
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcheck_effect_columns_nonzero.R
66 lines (66 loc) · 2.73 KB
/
check_effect_columns_nonzero.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
#' Ensure that the standard error (se) is positive for all SNPs
#'
#' @inheritParams format_sumstats
#' @param log_files list of log file locations
#' @return list containing sumstats_dt, the modified summary statistics data
#' table object and the log file list
#' @keywords internal
check_effect_columns_nonzero <- function(sumstats_dt, path,
effect_columns_nonzero,
log_folder_ind, check_save_out,
tabix_index, nThread, log_files) {
.SD <- NULL
col_headers <- names(sumstats_dt)
effect_columns <- c("BETA", "OR", "LOG_ODDS", "SIGNED_SUMSTAT")
if (sum(effect_columns %in% col_headers) >= 1 && effect_columns_nonzero) {
message("Filtering effect columns, ensuring none equal 0.")
# filter to effect columns in the data
effect_columns_dat <- effect_columns[effect_columns %in% col_headers]
# ensure numeric
sumstats_dt[, (effect_columns_dat) := lapply(.SD, as.numeric),
.SDcols = effect_columns_dat
]
# check if any equal 0 - use data table for speed
bad_ids <- sumstats_dt[, Reduce(`|`, lapply(.SD, `==`, 0)),
.SDcols = effect_columns_dat
]
num_bad_ids <- sum(bad_ids)
if (num_bad_ids > 0) {
msg <- paste0(
formatC(num_bad_ids, big.mark = ","), " SNPs",
" have effect values = 0 and will be removed"
)
message(msg)
# If user wants log, save it to there
if (log_folder_ind) {
name <- "effect_col_zero"
name <- get_unique_name_log_file(
name = name,
log_files = log_files
)
write_sumstats(
sumstats_dt = sumstats_dt[bad_ids, ],
save_path =
paste0(
check_save_out$log_folder,
"/", name,
check_save_out$extension
),
sep = check_save_out$sep,
#don't tab indx as could be miss values & cause err
#tabix_index = tabix_index,
nThread = nThread
)
log_files[[name]] <-
paste0(
check_save_out$log_folder, "/", name,
check_save_out$extension
)
}
sumstats_dt <- sumstats_dt[!bad_ids]
}
return(list("sumstats_dt" = sumstats_dt, "log_files" = log_files))
} else {
return(list("sumstats_dt" = sumstats_dt, "log_files" = log_files))
}
}