Skip to content

Commit

Permalink
updates
Browse files Browse the repository at this point in the history
  • Loading branch information
Sophia Li committed Apr 18, 2024
1 parent f02a734 commit 3146f7b
Show file tree
Hide file tree
Showing 9 changed files with 83 additions and 56 deletions.
8 changes: 4 additions & 4 deletions R/MAPIT.R
Original file line number Diff line number Diff line change
Expand Up @@ -122,7 +122,7 @@ mvmapit <- function(
variance_components_ind <- get_variance_components_ind(Y)
if (test == "hybrid") {
log$info("Running normal C++ routine.")
vc.mod <- MAPITCpp(X, Y, Z, C, variantIndex, "normal", cores = cores, skipProjection = skipProjection, NULL) # Normal Z-Test
vc.mod <- MAPITCpp(X, Y, Z, C, variantIndex, "normal", cores = cores, SKIPPROJECTIONS = skipProjection, NULL) # Normal Z-Test
pvals <- vc.mod$pvalues
# row.names(pvals) <- rownames(X)
pves <- vc.mod$PVE
Expand All @@ -146,19 +146,19 @@ mvmapit <- function(
)

log$info("Running davies C++ routine on selected SNPs.")
vc.mod <- MAPITCpp(X, Y, Z, C, ind, "davies", cores = cores, skipProjection = skipProjection, NULL)
vc.mod <- MAPITCpp(X, Y, Z, C, ind, "davies", cores = cores, SKIPPROJECTIONS = skipProjection, NULL)
davies.pvals <- mvmapit_pvalues(vc.mod, X, accuracy)
pvals[, variance_components_ind][ind_matrix] <- davies.pvals[, variance_components_ind][ind_matrix]
} else if (test == "normal") {
log$info("Running normal C++ routine.")
vc.mod <- MAPITCpp(X, Y, Z, C, variantIndex, "normal", cores = cores, skipProjection = skipProjection, NULL)
vc.mod <- MAPITCpp(X, Y, Z, C, variantIndex, "normal", cores = cores, SKIPPROJECTIONS = skipProjection, NULL)
pvals <- vc.mod$pvalues
pves <- vc.mod$PVE
timings <- vc.mod$timings
} else {
ind <- ifelse(variantIndex, variantIndex, 1:nrow(X))
log$info("Running davies C++ routine.")
vc.mod <- MAPITCpp(X, Y, Z, C, ind, "davies", cores = cores, skipProjection = skipProjection, NULL)
vc.mod <- MAPITCpp(X, Y, Z, C, ind, "davies", cores = cores, SKIPPROJECTIONS = skipProjection, NULL)
pvals <- mvmapit_pvalues(vc.mod, X, accuracy)
pvals <- set_covariance_components(variance_components_ind, pvals)
pves <- vc.mod$PVE
Expand Down
4 changes: 2 additions & 2 deletions R/RcppExports.R
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# Generated by using Rcpp::compileAttributes() -> do not edit by hand
# Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393

MAPITCpp <- function(X, Y, Z = NULL, C = NULL, variantIndices = NULL, testMethod = "normal", cores = 1L, skipProjection = FALSE, GeneticSimilarityMatrix = NULL) {
.Call('_mvMAPIT_MAPITCpp', PACKAGE = 'mvMAPIT', X, Y, Z, C, variantIndices, testMethod, cores, skipProjection, GeneticSimilarityMatrix)
MAPITCpp <- function(X, Y, Z = NULL, C = NULL, variantIndices = NULL, testMethod = "normal", cores = 1L, SKIPPROJECTIONS = FALSE, GeneticSimilarityMatrix = NULL) {
.Call('_mvMAPIT_MAPITCpp', PACKAGE = 'mvMAPIT', X, Y, Z, C, variantIndices, testMethod, cores, SKIPPROJECTIONS, GeneticSimilarityMatrix)
}

4 changes: 2 additions & 2 deletions src/MAPIT.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -172,10 +172,9 @@ Rcpp::List MAPITCpp(
b.cols(1, z) = Zz.t();
}
b.col(z + 1) = arma::trans(x_k);
M = compute_projection_matrix(n, b);

if(SKIPPROJECTIONS == false) { //need to fix name of skip projections variable

M = compute_projection_matrix(n, b);
K = project_matrix(K, b);
G = project_matrix(G, b);

Expand All @@ -187,6 +186,7 @@ Rcpp::List MAPITCpp(
Yc = Y * M; //hereP - Yc called a lot in other lines
} else {
Yc = Y;
M = arma::eye(n, n); //assume identity -- indep
}

end = steady_clock::now();
Expand Down
8 changes: 4 additions & 4 deletions src/RcppExports.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,8 @@ Rcpp::Rostream<false>& Rcpp::Rcerr = Rcpp::Rcpp_cerr_get();
#endif

// MAPITCpp
Rcpp::List MAPITCpp(const arma::mat X, const arma::mat Y, Rcpp::Nullable<Rcpp::NumericMatrix> Z, Rcpp::Nullable<Rcpp::NumericMatrix> C, Rcpp::Nullable<Rcpp::NumericVector> variantIndices, std::string testMethod, int cores, bool skipProjection, Rcpp::Nullable<Rcpp::NumericMatrix> GeneticSimilarityMatrix);
RcppExport SEXP _mvMAPIT_MAPITCpp(SEXP XSEXP, SEXP YSEXP, SEXP ZSEXP, SEXP CSEXP, SEXP variantIndicesSEXP, SEXP testMethodSEXP, SEXP coresSEXP, SEXP skipProjectionSEXP, SEXP GeneticSimilarityMatrixSEXP) {
Rcpp::List MAPITCpp(const arma::mat X, const arma::mat Y, Rcpp::Nullable<Rcpp::NumericMatrix> Z, Rcpp::Nullable<Rcpp::NumericMatrix> C, Rcpp::Nullable<Rcpp::NumericVector> variantIndices, std::string testMethod, int cores, bool SKIPPROJECTIONS, Rcpp::Nullable<Rcpp::NumericMatrix> GeneticSimilarityMatrix);
RcppExport SEXP _mvMAPIT_MAPITCpp(SEXP XSEXP, SEXP YSEXP, SEXP ZSEXP, SEXP CSEXP, SEXP variantIndicesSEXP, SEXP testMethodSEXP, SEXP coresSEXP, SEXP SKIPPROJECTIONSSEXP, SEXP GeneticSimilarityMatrixSEXP) {
BEGIN_RCPP
Rcpp::RObject rcpp_result_gen;
Rcpp::RNGScope rcpp_rngScope_gen;
Expand All @@ -24,9 +24,9 @@ BEGIN_RCPP
Rcpp::traits::input_parameter< Rcpp::Nullable<Rcpp::NumericVector> >::type variantIndices(variantIndicesSEXP);
Rcpp::traits::input_parameter< std::string >::type testMethod(testMethodSEXP);
Rcpp::traits::input_parameter< int >::type cores(coresSEXP);
Rcpp::traits::input_parameter< bool >::type skipProjection(skipProjectionSEXP);
Rcpp::traits::input_parameter< bool >::type SKIPPROJECTIONS(SKIPPROJECTIONSSEXP);
Rcpp::traits::input_parameter< Rcpp::Nullable<Rcpp::NumericMatrix> >::type GeneticSimilarityMatrix(GeneticSimilarityMatrixSEXP);
rcpp_result_gen = Rcpp::wrap(MAPITCpp(X, Y, Z, C, variantIndices, testMethod, cores, skipProjection, GeneticSimilarityMatrix));
rcpp_result_gen = Rcpp::wrap(MAPITCpp(X, Y, Z, C, variantIndices, testMethod, cores, SKIPPROJECTIONS, GeneticSimilarityMatrix));
return rcpp_result_gen;
END_RCPP
}
Expand Down
6 changes: 3 additions & 3 deletions testing_sophia/duration_scaling.R
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ for(i in 1:nrow(pairs)) {

#df$total_duration <- rowSums(df[ , c(1, 2, 3, 4, 5, 6)], na.rm=TRUE)

result <- df %>% group_by(n_sample, n_snp, projection) %>% summarise(total = sum(duration_ms))

saveRDS(result, "/oscar/data/lcrawfo1/sli347/duration_scaling.RDS")
result <- df %>% group_by(n_samples, n_snp, projection) %>% summarise(total = sum(duration_ms))

saveRDS(df, "/oscar/data/lcrawfo1/sli347/duration_scaling_df2.RDS")
saveRDS(result, "/oscar/data/lcrawfo1/sli347/duration_scaling2.RDS")
103 changes: 64 additions & 39 deletions testing_sophia/repeated_trials.R
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,12 @@ library(tidyverse)
library(dplyr)
library(mvMAPIT)
library(ggplot2)
library(qqplotr)

# for alternative data -- need to toggle to find a way to run for null data or could write separate script

mvmapit_simulation <- function(data_type = c("null", "alternative")) {

df <- NULL
total <- NULL
df_statistics <- data.frame(trial = c(), correlation_p = c()) #add heretibility and hypo
genotype_data <- readRDS("/oscar/data/lcrawfo1/sli347/biobank_1_10000.rds")

Expand All @@ -22,18 +21,18 @@ mvmapit_simulation <- function(data_type = c("null", "alternative")) {
}

for (x in 1:5) {
sample_ids <- sample(1:10000, 5000, replace = FALSE)

sample_ids <- sample(1:10000, 5000, replace = FALSE)

simulated_data <- simulate_traits(
genotype_data[sample_ids, 1:5000],
n_causal = 1000,
n_trait_specific,
n_pleiotropic,
H2 = 0.6, #error = 1-H^2 -- could vary
n_pleiotropic,
H2 = 0.6, #error = 1-H^2 -- could vary
d = 1,
rho = 0.2, #decrease number means easier to find snp
marginal_correlation = 0.2,
marginal_correlation = 0.2,
epistatic_correlation = 0.2,
group_ratio_trait = 1,
group_ratio_pleiotropic = 1,
Expand All @@ -42,70 +41,96 @@ mvmapit_simulation <- function(data_type = c("null", "alternative")) {
logLevel = "INFO",
logFile = NULL
)

#run on smaller data set -- to test
#might want to randomize columns


mvmapit <- mvmapit(
t(simulated_data$genotype),
t(simulated_data$trait),
test = "normal", #could add into method inputs the type of test we want to run - matcharg
skipProjection = FALSE
)

mvmapit_projection <- mvmapit(
t(simulated_data$genotype),
t(simulated_data$trait),
test = "normal",
test = "normal",
skipProjection = TRUE
)

#store data in tidyr where p-value lists are diff for each trial
df <- rbind(df, mvmapit$pvalues %>% mutate(trial = x, projections = "False"))
df <- rbind(df, mvmapit_projection$pvalues %>% mutate(trial = x, projections = "True"))
total <- rbind(total, mvmapit$pvalues %>% mutate(trial = x, projection = "False"))
total <- rbind(total, mvmapit_projection$pvalues %>% mutate(trial = x, projection = "True"))

#print(cor(mvmapit$pvalues$p, mvmapit_projection$pvalues$p))
df_statistics <- rbind(df_statistics, c(trial = x, correlation_p = cor(mvmapit$pvalues$p, mvmapit_projection$pvalues$p)))
}

#compare p-values af entire


#compare p-values af entire
#expectation under null -- pvalue uniform distribution (compare between proj and not)
dp <- list(rate=log(10))
di <- "exp"
de <- FALSE # enabling the detrend option

gg <- df %>% ggplot(mapping = aes(
sample = -log10(p)
)) +
ntest <- total %>%
group_by(projection) %>%
summarize(n = n())

ci <- 0.95
n_snps <- ntest$n[1]
clower = -log10(qbeta(
p = (1 - ci) / 2,
shape1 = seq(n_snps),
shape2 = rev(seq(n_snps))
))
cupper = -log10(qbeta(
p = (1 + ci) / 2,
shape1 = seq(n_snps),
shape2 = rev(seq(n_snps))
))

df1 <- total %>%
group_by(projection) %>%
arrange(p, .by_group = TRUE) %>%
mutate(
cupper = cupper,
clower = clower,
expected= -log10(ppoints(n_snps)),
observed = -log10(p)
)

gg <- df1 %>%
ggplot(mapping= aes(x = expected, y = observed, color = projection)) +
theme_bw() +
stat_qq_band(distribution = di,
dparams = dp,
detrend = de,
alpha = 0.5) +
stat_qq_line(distribution = di, dparams = dp, detrend = de) +
stat_qq_point(distribution = di, dparams = dp, detrend = de) +
theme(legend.position = "none") +
facet_wrap(~projection) +
geom_ribbon(aes(ymax = cupper, ymin = clower),
fill = "#999999", alpha = 0.5,linewidth =0.25
) +
geom_point() +
geom_segment(
data = . %>% filter(expected == max(expected)),
aes(x = 0, xend = expected, y = 0, yend = expected),
linewidth = 1, alpha = 1, lineend = "round"
) +
geom_hline(yintercept=-log10(0.05 / ntest$n[1]), linetype='dashed', col = 'black') +
labs(x = bquote("Theoretical Quantiles " -log[10](p)),
y = bquote("Sample Quantiles " -log[10](p))) +
facet_wrap(~projections) # check this one
ggsave("/oscar/data/lcrawfo1/sli347/repeated_qqplot.png", plot = gg, width = 6, height = 4, unit = "in", dpi = 300)
y = bquote("Sample Quantiles " -log[10](p))) +
theme(legend.position = "none") +
scale_color_manual(values = c("SteelBlue", "Indianred", "lightblue3", "#d79c9c", "darkgreen"))

ggsave("/oscar/data/lcrawfo1/sli347/repeated_qqplot3.png", plot = gg, width = 6, height = 4, unit = "in", dpi = 300)



#evaluate distribution of p-value correlations
colnames(df_statistics) <- c('trial', 'correlation_p')

box <- ggplot(df_statistics, aes(y = correlation_p)) +
geom_boxplot() + scale_x_discrete() +
labs(title = "Projection correlation distribution",
y = "correlation coefficent")
ggsave("/oscar/data/lcrawfo1/sli347/repeated_boxplot.png", plot = box, width = 6, height = 4, unit = "in", dpi = 300)

}

mvmapit_simulation("normal")
mvmapit_simulation("null")



2 changes: 1 addition & 1 deletion testing_sophia/slurmjob.sh
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#!/bin/bash
#SBATCH --time=0-01:00 # Runtime in D-HH:MM
#SBATCH --time=2-00:00 # Runtime in D-HH:MM
#SBATCH --job-name="duration"
#SBATCH --mem-per-cpu=10G
#SBATCH --cpus-per-task=2
Expand Down
2 changes: 1 addition & 1 deletion testing_sophia/slurmjob_repeated.sh
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#!/bin/bash
#SBATCH --time=0-01:00 # Runtime in D-HH:MM
#SBATCH --time=2-00:00 # Runtime in D-HH:MM
#SBATCH --job-name="simulating trials"
#SBATCH --mem-per-cpu=10G
#SBATCH --cpus-per-task=2
Expand Down
2 changes: 2 additions & 0 deletions tests/testthat/test-toggle-projections.R
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,8 @@ test_that(
t(Y),
test = "normal", cores = 1, logLevel = "DEBUG", skipProjection = TRUE
)
print(mapit$pvalues$p)
print( mapit_no_projections$pvalues$p)
correlation <- cor(mapit$pvalues$p, mapit_no_projections$pvalues$p)
print(correlation)

Expand Down

0 comments on commit 3146f7b

Please sign in to comment.