-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
1 changed file
with
311 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,311 @@ | ||
--- | ||
title: "functional_mvmapit" | ||
output: rmarkdown::html_vignette | ||
vignette: > | ||
%\VignetteIndexEntry{functional_mvmapit} | ||
%\VignetteEngine{knitr::rmarkdown} | ||
%\VignetteEncoding{UTF-8} | ||
--- | ||
|
||
```{r, include = FALSE} | ||
knitr::opts_chunk$set( | ||
collapse = TRUE, | ||
comment = "#>" | ||
) | ||
``` | ||
|
||
```{r setup} | ||
library(mvMAPIT) | ||
library(tidyr) | ||
library(dplyr) | ||
``` | ||
|
||
|
||
|
||
```{r} | ||
# seed <- sample(1:1e6, 1)#67132 | ||
n_samples <- c(1000, 2000)#2938 | ||
n_snp <- 5747 | ||
d <- 1 | ||
PVE <- 0.6 | ||
rho <- c(0.5, 0.8) | ||
n_causal <- 1000 | ||
n_trait_specific <- 0 | ||
n_pleiotropic <- 10 | ||
group_ratio_pleiotropic <- 1 | ||
epistatic_correlation <- 0.9 | ||
maf <- 0.05 | ||
n_rand_snps <- c(10,100) * (n_pleiotropic) | ||
num_experiments <- 10 | ||
parameters <- crossing(n_samples, rho, n_rand_snps) | ||
``` | ||
|
||
```{r simulations} | ||
# set.seed(1234) | ||
RESULT <- NULL | ||
P <- NULL | ||
for (k in 1:nrow(parameters)) { | ||
n_samples <- parameters[[k, 1]] | ||
rho <- parameters[[k, 2]] | ||
n_rand_snps <- parameters[[k, 3]] | ||
maf <- 0.05 + 0.45 * runif(n_snp) | ||
random_minor_allele_counts <- | ||
(runif(n_samples * n_snp) < maf) + (runif(n_samples * n_snp) < maf) | ||
genotype_data <- matrix( | ||
random_minor_allele_counts, | ||
nrow = n_samples, | ||
ncol = n_snp, | ||
byrow = TRUE, | ||
) | ||
sample_names <- seq_len(n_samples) %>% sprintf(fmt = "id%04d") | ||
snp_names <- seq_len(n_snp) %>% sprintf(fmt = "snp%04d") | ||
colnames(genotype_data) <- snp_names | ||
rownames(genotype_data) <- sample_names | ||
# | ||
for (l in seq_len(num_experiments)) { | ||
seed <- sample(1:1e6, 1) | ||
simulated_data <- simulate_traits( | ||
genotype_data, | ||
n_causal = n_causal, | ||
n_trait_specific = n_trait_specific, | ||
n_pleiotropic = n_pleiotropic, | ||
d = d, | ||
H2 = PVE, | ||
rho = rho, | ||
epistatic_correlation = epistatic_correlation, | ||
group_ratio_pleiotropic = group_ratio_pleiotropic, | ||
maf_threshold = 0.05, | ||
seed = seed | ||
) | ||
epi <- which(grepl("epi", colnames(simulated_data$genotype))) | ||
random_snps <- sample(1:n_snp, n_rand_snps) | ||
mask_snps <- unique(c(epi, random_snps)) | ||
mvmapit_normal_full <- mvmapit( | ||
t(simulated_data$genotype), | ||
t(simulated_data$trait), | ||
test = "normal", | ||
variantIndex = epi | ||
) | ||
pv_full <- mvmapit_normal_full$pvalues %>% | ||
drop_na() %>% | ||
select(-trait) %>% | ||
mutate(covar = "full", | ||
n = n_samples, | ||
rho = rho, | ||
n_rand_snps = n_snp - length(epi) - 1, | ||
trial = l) | ||
W <- matrix(0, nrow = n_snp, ncol = n_snp) | ||
for (i in mask_snps) { | ||
for (j in mask_snps) { | ||
W[i, j] = 1 | ||
} | ||
} | ||
mvmapit_normal_masked <- mvmapit( | ||
t(simulated_data$genotype), | ||
t(simulated_data$trait), | ||
W = W, | ||
test = "normal", | ||
variantIndex = epi | ||
) | ||
pv_masked <- mvmapit_normal_masked$pvalues %>% | ||
drop_na() %>% | ||
select(-trait) %>% | ||
mutate(covar = "masked", | ||
n = n_samples, | ||
rho = rho, | ||
n_rand_snps = n_rand_snps, | ||
trial = l) | ||
p <- pv_masked %>% | ||
rbind(pv_full) | ||
P <- rbind(P, p) | ||
} | ||
} | ||
``` | ||
```{r} | ||
RESULT %>% | ||
group_by(covar, alpha, n, rho, n_rand_snps) %>% | ||
summarise(se = sd(power) / sqrt(num_experiments), | ||
power = mean(power)) | ||
``` | ||
```{r} | ||
RES05 <- P %>% | ||
group_by(covar, n, rho, n_rand_snps, trial) %>% | ||
summarise( | ||
power = sum(p < 0.05) / n() | ||
) %>% | ||
mutate(alpha = 0.05) | ||
RES01 <- P %>% | ||
group_by(covar, n, rho, n_rand_snps, trial) %>% | ||
summarise( | ||
power = sum(p < 0.01) / n() | ||
) %>% | ||
mutate(alpha = 0.01) | ||
RES001 <- P %>% | ||
group_by(covar, n, rho, n_rand_snps, trial) %>% | ||
summarise( | ||
power = sum(p < 0.001) / n() | ||
) %>% | ||
mutate(alpha = 0.001) | ||
RES <- RES05 %>% rbind(RES01) %>% rbind(RES001) | ||
PLOT <- RES %>% | ||
group_by(alpha, rho, n, n_rand_snps,covar) %>% | ||
summarise(se = sd(power) / sqrt(num_experiments), | ||
power = mean(power)) | ||
``` | ||
|
||
```{r} | ||
library(ggplot2) | ||
gg <- PLOT %>% | ||
filter(n == 2000) %>% | ||
mutate(n_rand_snps = case_when( | ||
n_rand_snps == "NA" ~ "5737", | ||
TRUE ~ n_rand_snps)) %>% | ||
ggplot(aes(x=n_rand_snps, y=power, fill=covar)) + | ||
geom_bar(stat="identity") + | ||
geom_errorbar(aes(ymin=power-se, ymax=power+se), width=.2, | ||
position=position_dodge(.9)) + | ||
facet_grid(rho ~ alpha, labeller = label_both) + | ||
theme_minimal() + | ||
scale_fill_manual(values = c("Indianred", "SteelBlue")) + | ||
labs( | ||
x = "# Non-epistatic SNPs in covar", | ||
y = "Power") + | ||
theme(legend.position = "none") | ||
show(gg) | ||
ggsave(paste0("proof-of-concept", ".png"), plot = gg, width = 6, height = 4, dpi = 300) | ||
``` | ||
|
||
```{r bhadur} | ||
library(tidyr) | ||
library(dplyr) | ||
n_samples <- c(100, 1000, 2000) #2938 | ||
N <- max(n_samples) | ||
n_snp <- 6000 | ||
d <- 1 | ||
PVE <- 0.6 | ||
rho <- 0.8 | ||
n_causal <- 50 | ||
n_trait_specific <- 0 | ||
n_pleiotropic <- 10 | ||
group_ratio_pleiotropic <- 1 | ||
epistatic_correlation <- 0.9 | ||
maf <- 0.05 | ||
n_rand_snps <- c(100, 1000, 5000) | ||
num_experiments <- 5 | ||
parameters <- crossing(n_samples, n_rand_snps) | ||
maf <- 0.05 + 0.45 * runif(n_snp) | ||
random_minor_allele_counts <- | ||
(runif(N * n_snp) < maf) + (runif(N * n_snp) < maf) | ||
genotype_data <- matrix( | ||
random_minor_allele_counts, | ||
nrow = N, | ||
ncol = n_snp, | ||
byrow = TRUE, | ||
) | ||
sample_names <- seq_len(N) %>% sprintf(fmt = "id%04d") | ||
snp_names <- seq_len(n_snp) %>% sprintf(fmt = "snp%04d") | ||
colnames(genotype_data) <- snp_names | ||
rownames(genotype_data) <- sample_names | ||
seed <- sample(1:1e6, 1) | ||
simulated_data <- simulate_traits( | ||
genotype_data, | ||
n_causal = n_causal, | ||
n_trait_specific = n_trait_specific, | ||
n_pleiotropic = n_pleiotropic, | ||
d = d, | ||
H2 = PVE, | ||
rho = rho, | ||
epistatic_correlation = epistatic_correlation, | ||
group_ratio_pleiotropic = group_ratio_pleiotropic, | ||
maf_threshold = 0.05, | ||
seed = seed | ||
) | ||
``` | ||
|
||
```{r} | ||
RESULT <- NULL | ||
P2 <- NULL | ||
for (k in 1:nrow(parameters)) { | ||
n_samples <- parameters[[k, 1]] | ||
n_rand_snps <- parameters[[k, 2]] | ||
ind <- sample(1:N, n_samples, replace = FALSE) | ||
# | ||
print(n_samples) | ||
print(n_rand_snps) | ||
for (l in seq_len(num_experiments)) { | ||
epi <- which(grepl("epi", colnames(simulated_data$genotype))) | ||
random_snps <- sample(1:n_snp, n_rand_snps) | ||
mask_snps <- unique(c(epi, random_snps)) | ||
W <- matrix(0, nrow = n_snp, ncol = n_snp) | ||
for (i in mask_snps) { | ||
for (j in mask_snps) { | ||
W[i, j] = 1 | ||
} | ||
} | ||
mvmapit_normal_masked <- mvmapit( | ||
t(simulated_data$genotype[ind,]), | ||
t(simulated_data$trait[ind,]), | ||
W = W, | ||
test = "normal", | ||
variantIndex = epi | ||
) | ||
p <- mvmapit_normal_masked$pvalues %>% | ||
drop_na() %>% | ||
select(-trait) %>% | ||
mutate(n = n_samples, | ||
rho = rho, | ||
n_rand_snps = n_rand_snps, | ||
trial = l) | ||
P2 <- rbind(P2, p) | ||
} | ||
} | ||
``` | ||
```{r} | ||
library(ggplot2) | ||
P2 %>% | ||
select(n, p, n_rand_snps) %>% | ||
mutate(rand = as.factor(n_rand_snps), | ||
pplot = -log10(p)) %>% | ||
group_by(n_rand_snps) %>% | ||
ggplot(aes(x = n, y = p, color = rand) ) + | ||
facet_wrap(rho) + | ||
geom_point() + | ||
geom_smooth(method = "lm", se = FALSE) | ||
``` | ||
```{r} | ||
bhadur <- P2 %>% | ||
group_by(id, n, rho, n_rand_snps) %>% | ||
summarise(p = mean(p)) %>% | ||
ungroup() %>% | ||
nest(-id, -rho, -n_rand_snps) %>% | ||
mutate(slope = map_dbl(data, ~coef(lm(p ~ n, data = .x))[["n"]])) %>% | ||
unnest(data) %>% | ||
group_by(rho, n_rand_snps) %>% | ||
mutate(m = mean(slope), | ||
signal_to_noise = as.factor(10/n_rand_snps)) %>% | ||
ggplot(aes(x = n, y = p, color = signal_to_noise) ) + | ||
# geom_point() + | ||
geom_abline(aes(slope=m, intercept = 1, color = signal_to_noise), size=2) + | ||
xlim(0, 10000) + | ||
theme_minimal() + | ||
scale_fill_manual(values = c("Indianred", "SteelBlue", "Aquamarine")) + | ||
labs( | ||
x = "Sample Size", | ||
y = "Empirical p-value decrease") | ||
show(bhadur) | ||
ggsave(paste0("proof-of-concept-bhadur", ".png"), plot = bhadur, width = 6, height = 4, dpi = 300) | ||
``` | ||
|