Skip to content

Commit

Permalink
MM-134 working examples
Browse files Browse the repository at this point in the history
  • Loading branch information
jdstamp committed Feb 26, 2024
1 parent bfbe934 commit 2d41a6d
Showing 1 changed file with 311 additions and 0 deletions.
311 changes: 311 additions & 0 deletions vignettes/functional_mvmapit.Rmd
Original file line number Diff line number Diff line change
@@ -0,0 +1,311 @@
---
title: "functional_mvmapit"
output: rmarkdown::html_vignette
vignette: >
%\VignetteIndexEntry{functional_mvmapit}
%\VignetteEngine{knitr::rmarkdown}
%\VignetteEncoding{UTF-8}
---

```{r, include = FALSE}
knitr::opts_chunk$set(
collapse = TRUE,
comment = "#>"
)
```

```{r setup}
library(mvMAPIT)
library(tidyr)
library(dplyr)
```



```{r}
# seed <- sample(1:1e6, 1)#67132
n_samples <- c(1000, 2000)#2938
n_snp <- 5747
d <- 1
PVE <- 0.6
rho <- c(0.5, 0.8)
n_causal <- 1000
n_trait_specific <- 0
n_pleiotropic <- 10
group_ratio_pleiotropic <- 1
epistatic_correlation <- 0.9
maf <- 0.05
n_rand_snps <- c(10,100) * (n_pleiotropic)
num_experiments <- 10
parameters <- crossing(n_samples, rho, n_rand_snps)
```

```{r simulations}
# set.seed(1234)
RESULT <- NULL
P <- NULL
for (k in 1:nrow(parameters)) {
n_samples <- parameters[[k, 1]]
rho <- parameters[[k, 2]]
n_rand_snps <- parameters[[k, 3]]
maf <- 0.05 + 0.45 * runif(n_snp)
random_minor_allele_counts <-
(runif(n_samples * n_snp) < maf) + (runif(n_samples * n_snp) < maf)
genotype_data <- matrix(
random_minor_allele_counts,
nrow = n_samples,
ncol = n_snp,
byrow = TRUE,
)
sample_names <- seq_len(n_samples) %>% sprintf(fmt = "id%04d")
snp_names <- seq_len(n_snp) %>% sprintf(fmt = "snp%04d")
colnames(genotype_data) <- snp_names
rownames(genotype_data) <- sample_names
#
for (l in seq_len(num_experiments)) {
seed <- sample(1:1e6, 1)
simulated_data <- simulate_traits(
genotype_data,
n_causal = n_causal,
n_trait_specific = n_trait_specific,
n_pleiotropic = n_pleiotropic,
d = d,
H2 = PVE,
rho = rho,
epistatic_correlation = epistatic_correlation,
group_ratio_pleiotropic = group_ratio_pleiotropic,
maf_threshold = 0.05,
seed = seed
)
epi <- which(grepl("epi", colnames(simulated_data$genotype)))
random_snps <- sample(1:n_snp, n_rand_snps)
mask_snps <- unique(c(epi, random_snps))
mvmapit_normal_full <- mvmapit(
t(simulated_data$genotype),
t(simulated_data$trait),
test = "normal",
variantIndex = epi
)
pv_full <- mvmapit_normal_full$pvalues %>%
drop_na() %>%
select(-trait) %>%
mutate(covar = "full",
n = n_samples,
rho = rho,
n_rand_snps = n_snp - length(epi) - 1,
trial = l)
W <- matrix(0, nrow = n_snp, ncol = n_snp)
for (i in mask_snps) {
for (j in mask_snps) {
W[i, j] = 1
}
}
mvmapit_normal_masked <- mvmapit(
t(simulated_data$genotype),
t(simulated_data$trait),
W = W,
test = "normal",
variantIndex = epi
)
pv_masked <- mvmapit_normal_masked$pvalues %>%
drop_na() %>%
select(-trait) %>%
mutate(covar = "masked",
n = n_samples,
rho = rho,
n_rand_snps = n_rand_snps,
trial = l)
p <- pv_masked %>%
rbind(pv_full)
P <- rbind(P, p)
}
}
```
```{r}
RESULT %>%
group_by(covar, alpha, n, rho, n_rand_snps) %>%
summarise(se = sd(power) / sqrt(num_experiments),
power = mean(power))
```
```{r}
RES05 <- P %>%
group_by(covar, n, rho, n_rand_snps, trial) %>%
summarise(
power = sum(p < 0.05) / n()
) %>%
mutate(alpha = 0.05)
RES01 <- P %>%
group_by(covar, n, rho, n_rand_snps, trial) %>%
summarise(
power = sum(p < 0.01) / n()
) %>%
mutate(alpha = 0.01)
RES001 <- P %>%
group_by(covar, n, rho, n_rand_snps, trial) %>%
summarise(
power = sum(p < 0.001) / n()
) %>%
mutate(alpha = 0.001)
RES <- RES05 %>% rbind(RES01) %>% rbind(RES001)
PLOT <- RES %>%
group_by(alpha, rho, n, n_rand_snps,covar) %>%
summarise(se = sd(power) / sqrt(num_experiments),
power = mean(power))
```

```{r}
library(ggplot2)
gg <- PLOT %>%
filter(n == 2000) %>%
mutate(n_rand_snps = case_when(
n_rand_snps == "NA" ~ "5737",
TRUE ~ n_rand_snps)) %>%
ggplot(aes(x=n_rand_snps, y=power, fill=covar)) +
geom_bar(stat="identity") +
geom_errorbar(aes(ymin=power-se, ymax=power+se), width=.2,
position=position_dodge(.9)) +
facet_grid(rho ~ alpha, labeller = label_both) +
theme_minimal() +
scale_fill_manual(values = c("Indianred", "SteelBlue")) +
labs(
x = "# Non-epistatic SNPs in covar",
y = "Power") +
theme(legend.position = "none")
show(gg)
ggsave(paste0("proof-of-concept", ".png"), plot = gg, width = 6, height = 4, dpi = 300)
```

```{r bhadur}
library(tidyr)
library(dplyr)
n_samples <- c(100, 1000, 2000) #2938
N <- max(n_samples)
n_snp <- 6000
d <- 1
PVE <- 0.6
rho <- 0.8
n_causal <- 50
n_trait_specific <- 0
n_pleiotropic <- 10
group_ratio_pleiotropic <- 1
epistatic_correlation <- 0.9
maf <- 0.05
n_rand_snps <- c(100, 1000, 5000)
num_experiments <- 5
parameters <- crossing(n_samples, n_rand_snps)
maf <- 0.05 + 0.45 * runif(n_snp)
random_minor_allele_counts <-
(runif(N * n_snp) < maf) + (runif(N * n_snp) < maf)
genotype_data <- matrix(
random_minor_allele_counts,
nrow = N,
ncol = n_snp,
byrow = TRUE,
)
sample_names <- seq_len(N) %>% sprintf(fmt = "id%04d")
snp_names <- seq_len(n_snp) %>% sprintf(fmt = "snp%04d")
colnames(genotype_data) <- snp_names
rownames(genotype_data) <- sample_names
seed <- sample(1:1e6, 1)
simulated_data <- simulate_traits(
genotype_data,
n_causal = n_causal,
n_trait_specific = n_trait_specific,
n_pleiotropic = n_pleiotropic,
d = d,
H2 = PVE,
rho = rho,
epistatic_correlation = epistatic_correlation,
group_ratio_pleiotropic = group_ratio_pleiotropic,
maf_threshold = 0.05,
seed = seed
)
```

```{r}
RESULT <- NULL
P2 <- NULL
for (k in 1:nrow(parameters)) {
n_samples <- parameters[[k, 1]]
n_rand_snps <- parameters[[k, 2]]
ind <- sample(1:N, n_samples, replace = FALSE)
#
print(n_samples)
print(n_rand_snps)
for (l in seq_len(num_experiments)) {
epi <- which(grepl("epi", colnames(simulated_data$genotype)))
random_snps <- sample(1:n_snp, n_rand_snps)
mask_snps <- unique(c(epi, random_snps))
W <- matrix(0, nrow = n_snp, ncol = n_snp)
for (i in mask_snps) {
for (j in mask_snps) {
W[i, j] = 1
}
}
mvmapit_normal_masked <- mvmapit(
t(simulated_data$genotype[ind,]),
t(simulated_data$trait[ind,]),
W = W,
test = "normal",
variantIndex = epi
)
p <- mvmapit_normal_masked$pvalues %>%
drop_na() %>%
select(-trait) %>%
mutate(n = n_samples,
rho = rho,
n_rand_snps = n_rand_snps,
trial = l)
P2 <- rbind(P2, p)
}
}
```
```{r}
library(ggplot2)
P2 %>%
select(n, p, n_rand_snps) %>%
mutate(rand = as.factor(n_rand_snps),
pplot = -log10(p)) %>%
group_by(n_rand_snps) %>%
ggplot(aes(x = n, y = p, color = rand) ) +
facet_wrap(rho) +
geom_point() +
geom_smooth(method = "lm", se = FALSE)
```
```{r}
bhadur <- P2 %>%
group_by(id, n, rho, n_rand_snps) %>%
summarise(p = mean(p)) %>%
ungroup() %>%
nest(-id, -rho, -n_rand_snps) %>%
mutate(slope = map_dbl(data, ~coef(lm(p ~ n, data = .x))[["n"]])) %>%
unnest(data) %>%
group_by(rho, n_rand_snps) %>%
mutate(m = mean(slope),
signal_to_noise = as.factor(10/n_rand_snps)) %>%
ggplot(aes(x = n, y = p, color = signal_to_noise) ) +
# geom_point() +
geom_abline(aes(slope=m, intercept = 1, color = signal_to_noise), size=2) +
xlim(0, 10000) +
theme_minimal() +
scale_fill_manual(values = c("Indianred", "SteelBlue", "Aquamarine")) +
labs(
x = "Sample Size",
y = "Empirical p-value decrease")
show(bhadur)
ggsave(paste0("proof-of-concept-bhadur", ".png"), plot = bhadur, width = 6, height = 4, dpi = 300)
```

0 comments on commit 2d41a6d

Please sign in to comment.