From 3146f7b9bfa148baa9a2bc1d9e24f516e700157a Mon Sep 17 00:00:00 2001 From: Sophia Li Date: Thu, 18 Apr 2024 11:00:19 -0400 Subject: [PATCH] updates --- R/MAPIT.R | 8 +- R/RcppExports.R | 4 +- src/MAPIT.cpp | 4 +- src/RcppExports.cpp | 8 +- testing_sophia/duration_scaling.R | 6 +- testing_sophia/repeated_trials.R | 103 ++++++++++++++--------- testing_sophia/slurmjob.sh | 2 +- testing_sophia/slurmjob_repeated.sh | 2 +- tests/testthat/test-toggle-projections.R | 2 + 9 files changed, 83 insertions(+), 56 deletions(-) diff --git a/R/MAPIT.R b/R/MAPIT.R index b60d684..304d593 100644 --- a/R/MAPIT.R +++ b/R/MAPIT.R @@ -122,7 +122,7 @@ mvmapit <- function( variance_components_ind <- get_variance_components_ind(Y) if (test == "hybrid") { log$info("Running normal C++ routine.") - vc.mod <- MAPITCpp(X, Y, Z, C, variantIndex, "normal", cores = cores, skipProjection = skipProjection, NULL) # Normal Z-Test + vc.mod <- MAPITCpp(X, Y, Z, C, variantIndex, "normal", cores = cores, SKIPPROJECTIONS = skipProjection, NULL) # Normal Z-Test pvals <- vc.mod$pvalues # row.names(pvals) <- rownames(X) pves <- vc.mod$PVE @@ -146,19 +146,19 @@ mvmapit <- function( ) log$info("Running davies C++ routine on selected SNPs.") - vc.mod <- MAPITCpp(X, Y, Z, C, ind, "davies", cores = cores, skipProjection = skipProjection, NULL) + vc.mod <- MAPITCpp(X, Y, Z, C, ind, "davies", cores = cores, SKIPPROJECTIONS = skipProjection, NULL) davies.pvals <- mvmapit_pvalues(vc.mod, X, accuracy) pvals[, variance_components_ind][ind_matrix] <- davies.pvals[, variance_components_ind][ind_matrix] } else if (test == "normal") { log$info("Running normal C++ routine.") - vc.mod <- MAPITCpp(X, Y, Z, C, variantIndex, "normal", cores = cores, skipProjection = skipProjection, NULL) + vc.mod <- MAPITCpp(X, Y, Z, C, variantIndex, "normal", cores = cores, SKIPPROJECTIONS = skipProjection, NULL) pvals <- vc.mod$pvalues pves <- vc.mod$PVE timings <- vc.mod$timings } else { ind <- ifelse(variantIndex, variantIndex, 1:nrow(X)) log$info("Running davies C++ routine.") - vc.mod <- MAPITCpp(X, Y, Z, C, ind, "davies", cores = cores, skipProjection = skipProjection, NULL) + vc.mod <- MAPITCpp(X, Y, Z, C, ind, "davies", cores = cores, SKIPPROJECTIONS = skipProjection, NULL) pvals <- mvmapit_pvalues(vc.mod, X, accuracy) pvals <- set_covariance_components(variance_components_ind, pvals) pves <- vc.mod$PVE diff --git a/R/RcppExports.R b/R/RcppExports.R index 9b6f3b1..ca8109a 100644 --- a/R/RcppExports.R +++ b/R/RcppExports.R @@ -1,7 +1,7 @@ # Generated by using Rcpp::compileAttributes() -> do not edit by hand # Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393 -MAPITCpp <- function(X, Y, Z = NULL, C = NULL, variantIndices = NULL, testMethod = "normal", cores = 1L, skipProjection = FALSE, GeneticSimilarityMatrix = NULL) { - .Call('_mvMAPIT_MAPITCpp', PACKAGE = 'mvMAPIT', X, Y, Z, C, variantIndices, testMethod, cores, skipProjection, GeneticSimilarityMatrix) +MAPITCpp <- function(X, Y, Z = NULL, C = NULL, variantIndices = NULL, testMethod = "normal", cores = 1L, SKIPPROJECTIONS = FALSE, GeneticSimilarityMatrix = NULL) { + .Call('_mvMAPIT_MAPITCpp', PACKAGE = 'mvMAPIT', X, Y, Z, C, variantIndices, testMethod, cores, SKIPPROJECTIONS, GeneticSimilarityMatrix) } diff --git a/src/MAPIT.cpp b/src/MAPIT.cpp index ea4c2fc..0d62af0 100644 --- a/src/MAPIT.cpp +++ b/src/MAPIT.cpp @@ -172,10 +172,9 @@ Rcpp::List MAPITCpp( b.cols(1, z) = Zz.t(); } b.col(z + 1) = arma::trans(x_k); - M = compute_projection_matrix(n, b); if(SKIPPROJECTIONS == false) { //need to fix name of skip projections variable - + M = compute_projection_matrix(n, b); K = project_matrix(K, b); G = project_matrix(G, b); @@ -187,6 +186,7 @@ Rcpp::List MAPITCpp( Yc = Y * M; //hereP - Yc called a lot in other lines } else { Yc = Y; + M = arma::eye(n, n); //assume identity -- indep } end = steady_clock::now(); diff --git a/src/RcppExports.cpp b/src/RcppExports.cpp index af6144a..006ae6b 100644 --- a/src/RcppExports.cpp +++ b/src/RcppExports.cpp @@ -12,8 +12,8 @@ Rcpp::Rostream& Rcpp::Rcerr = Rcpp::Rcpp_cerr_get(); #endif // MAPITCpp -Rcpp::List MAPITCpp(const arma::mat X, const arma::mat Y, Rcpp::Nullable Z, Rcpp::Nullable C, Rcpp::Nullable variantIndices, std::string testMethod, int cores, bool skipProjection, Rcpp::Nullable GeneticSimilarityMatrix); -RcppExport SEXP _mvMAPIT_MAPITCpp(SEXP XSEXP, SEXP YSEXP, SEXP ZSEXP, SEXP CSEXP, SEXP variantIndicesSEXP, SEXP testMethodSEXP, SEXP coresSEXP, SEXP skipProjectionSEXP, SEXP GeneticSimilarityMatrixSEXP) { +Rcpp::List MAPITCpp(const arma::mat X, const arma::mat Y, Rcpp::Nullable Z, Rcpp::Nullable C, Rcpp::Nullable variantIndices, std::string testMethod, int cores, bool SKIPPROJECTIONS, Rcpp::Nullable GeneticSimilarityMatrix); +RcppExport SEXP _mvMAPIT_MAPITCpp(SEXP XSEXP, SEXP YSEXP, SEXP ZSEXP, SEXP CSEXP, SEXP variantIndicesSEXP, SEXP testMethodSEXP, SEXP coresSEXP, SEXP SKIPPROJECTIONSSEXP, SEXP GeneticSimilarityMatrixSEXP) { BEGIN_RCPP Rcpp::RObject rcpp_result_gen; Rcpp::RNGScope rcpp_rngScope_gen; @@ -24,9 +24,9 @@ BEGIN_RCPP Rcpp::traits::input_parameter< Rcpp::Nullable >::type variantIndices(variantIndicesSEXP); Rcpp::traits::input_parameter< std::string >::type testMethod(testMethodSEXP); Rcpp::traits::input_parameter< int >::type cores(coresSEXP); - Rcpp::traits::input_parameter< bool >::type skipProjection(skipProjectionSEXP); + Rcpp::traits::input_parameter< bool >::type SKIPPROJECTIONS(SKIPPROJECTIONSSEXP); Rcpp::traits::input_parameter< Rcpp::Nullable >::type GeneticSimilarityMatrix(GeneticSimilarityMatrixSEXP); - rcpp_result_gen = Rcpp::wrap(MAPITCpp(X, Y, Z, C, variantIndices, testMethod, cores, skipProjection, GeneticSimilarityMatrix)); + rcpp_result_gen = Rcpp::wrap(MAPITCpp(X, Y, Z, C, variantIndices, testMethod, cores, SKIPPROJECTIONS, GeneticSimilarityMatrix)); return rcpp_result_gen; END_RCPP } diff --git a/testing_sophia/duration_scaling.R b/testing_sophia/duration_scaling.R index dd240e0..ceb896a 100644 --- a/testing_sophia/duration_scaling.R +++ b/testing_sophia/duration_scaling.R @@ -63,7 +63,7 @@ for(i in 1:nrow(pairs)) { #df$total_duration <- rowSums(df[ , c(1, 2, 3, 4, 5, 6)], na.rm=TRUE) -result <- df %>% group_by(n_sample, n_snp, projection) %>% summarise(total = sum(duration_ms)) - -saveRDS(result, "/oscar/data/lcrawfo1/sli347/duration_scaling.RDS") +result <- df %>% group_by(n_samples, n_snp, projection) %>% summarise(total = sum(duration_ms)) +saveRDS(df, "/oscar/data/lcrawfo1/sli347/duration_scaling_df2.RDS") +saveRDS(result, "/oscar/data/lcrawfo1/sli347/duration_scaling2.RDS") diff --git a/testing_sophia/repeated_trials.R b/testing_sophia/repeated_trials.R index e153719..875cc9e 100644 --- a/testing_sophia/repeated_trials.R +++ b/testing_sophia/repeated_trials.R @@ -3,13 +3,12 @@ library(tidyverse) library(dplyr) library(mvMAPIT) library(ggplot2) -library(qqplotr) # for alternative data -- need to toggle to find a way to run for null data or could write separate script mvmapit_simulation <- function(data_type = c("null", "alternative")) { - df <- NULL + total <- NULL df_statistics <- data.frame(trial = c(), correlation_p = c()) #add heretibility and hypo genotype_data <- readRDS("/oscar/data/lcrawfo1/sli347/biobank_1_10000.rds") @@ -22,18 +21,18 @@ mvmapit_simulation <- function(data_type = c("null", "alternative")) { } for (x in 1:5) { - - sample_ids <- sample(1:10000, 5000, replace = FALSE) + + sample_ids <- sample(1:10000, 5000, replace = FALSE) simulated_data <- simulate_traits( genotype_data[sample_ids, 1:5000], n_causal = 1000, n_trait_specific, - n_pleiotropic, - H2 = 0.6, #error = 1-H^2 -- could vary + n_pleiotropic, + H2 = 0.6, #error = 1-H^2 -- could vary d = 1, rho = 0.2, #decrease number means easier to find snp - marginal_correlation = 0.2, + marginal_correlation = 0.2, epistatic_correlation = 0.2, group_ratio_trait = 1, group_ratio_pleiotropic = 1, @@ -42,10 +41,10 @@ mvmapit_simulation <- function(data_type = c("null", "alternative")) { logLevel = "INFO", logFile = NULL ) - + #run on smaller data set -- to test #might want to randomize columns - + mvmapit <- mvmapit( t(simulated_data$genotype), @@ -53,59 +52,85 @@ mvmapit_simulation <- function(data_type = c("null", "alternative")) { test = "normal", #could add into method inputs the type of test we want to run - matcharg skipProjection = FALSE ) - + mvmapit_projection <- mvmapit( t(simulated_data$genotype), t(simulated_data$trait), - test = "normal", + test = "normal", skipProjection = TRUE ) - + #store data in tidyr where p-value lists are diff for each trial - df <- rbind(df, mvmapit$pvalues %>% mutate(trial = x, projections = "False")) - df <- rbind(df, mvmapit_projection$pvalues %>% mutate(trial = x, projections = "True")) - + total <- rbind(total, mvmapit$pvalues %>% mutate(trial = x, projection = "False")) + total <- rbind(total, mvmapit_projection$pvalues %>% mutate(trial = x, projection = "True")) + #print(cor(mvmapit$pvalues$p, mvmapit_projection$pvalues$p)) df_statistics <- rbind(df_statistics, c(trial = x, correlation_p = cor(mvmapit$pvalues$p, mvmapit_projection$pvalues$p))) } - - #compare p-values af entire - + #compare p-values af entire #expectation under null -- pvalue uniform distribution (compare between proj and not) - dp <- list(rate=log(10)) - di <- "exp" - de <- FALSE # enabling the detrend option - - gg <- df %>% ggplot(mapping = aes( - sample = -log10(p) - )) + + ntest <- total %>% + group_by(projection) %>% + summarize(n = n()) + + ci <- 0.95 + n_snps <- ntest$n[1] + clower = -log10(qbeta( + p = (1 - ci) / 2, + shape1 = seq(n_snps), + shape2 = rev(seq(n_snps)) + )) + cupper = -log10(qbeta( + p = (1 + ci) / 2, + shape1 = seq(n_snps), + shape2 = rev(seq(n_snps)) + )) + + df1 <- total %>% + group_by(projection) %>% + arrange(p, .by_group = TRUE) %>% + mutate( + cupper = cupper, + clower = clower, + expected= -log10(ppoints(n_snps)), + observed = -log10(p) + ) + + gg <- df1 %>% + ggplot(mapping= aes(x = expected, y = observed, color = projection)) + theme_bw() + - stat_qq_band(distribution = di, - dparams = dp, - detrend = de, - alpha = 0.5) + - stat_qq_line(distribution = di, dparams = dp, detrend = de) + - stat_qq_point(distribution = di, dparams = dp, detrend = de) + - theme(legend.position = "none") + + facet_wrap(~projection) + + geom_ribbon(aes(ymax = cupper, ymin = clower), + fill = "#999999", alpha = 0.5,linewidth =0.25 + ) + + geom_point() + + geom_segment( + data = . %>% filter(expected == max(expected)), + aes(x = 0, xend = expected, y = 0, yend = expected), + linewidth = 1, alpha = 1, lineend = "round" + ) + + geom_hline(yintercept=-log10(0.05 / ntest$n[1]), linetype='dashed', col = 'black') + labs(x = bquote("Theoretical Quantiles " -log[10](p)), - y = bquote("Sample Quantiles " -log[10](p))) + - facet_wrap(~projections) # check this one - ggsave("/oscar/data/lcrawfo1/sli347/repeated_qqplot.png", plot = gg, width = 6, height = 4, unit = "in", dpi = 300) + y = bquote("Sample Quantiles " -log[10](p))) + + theme(legend.position = "none") + + scale_color_manual(values = c("SteelBlue", "Indianred", "lightblue3", "#d79c9c", "darkgreen")) + + ggsave("/oscar/data/lcrawfo1/sli347/repeated_qqplot3.png", plot = gg, width = 6, height = 4, unit = "in", dpi = 300) + - #evaluate distribution of p-value correlations colnames(df_statistics) <- c('trial', 'correlation_p') - + box <- ggplot(df_statistics, aes(y = correlation_p)) + geom_boxplot() + scale_x_discrete() + labs(title = "Projection correlation distribution", y = "correlation coefficent") ggsave("/oscar/data/lcrawfo1/sli347/repeated_boxplot.png", plot = box, width = 6, height = 4, unit = "in", dpi = 300) - + } -mvmapit_simulation("normal") +mvmapit_simulation("null") diff --git a/testing_sophia/slurmjob.sh b/testing_sophia/slurmjob.sh index 91aefb2..5151bbf 100644 --- a/testing_sophia/slurmjob.sh +++ b/testing_sophia/slurmjob.sh @@ -1,5 +1,5 @@ #!/bin/bash -#SBATCH --time=0-01:00 # Runtime in D-HH:MM +#SBATCH --time=2-00:00 # Runtime in D-HH:MM #SBATCH --job-name="duration" #SBATCH --mem-per-cpu=10G #SBATCH --cpus-per-task=2 diff --git a/testing_sophia/slurmjob_repeated.sh b/testing_sophia/slurmjob_repeated.sh index 3b48e23..7bfae66 100644 --- a/testing_sophia/slurmjob_repeated.sh +++ b/testing_sophia/slurmjob_repeated.sh @@ -1,5 +1,5 @@ #!/bin/bash -#SBATCH --time=0-01:00 # Runtime in D-HH:MM +#SBATCH --time=2-00:00 # Runtime in D-HH:MM #SBATCH --job-name="simulating trials" #SBATCH --mem-per-cpu=10G #SBATCH --cpus-per-task=2 diff --git a/tests/testthat/test-toggle-projections.R b/tests/testthat/test-toggle-projections.R index 33acff0..41d3f0b 100644 --- a/tests/testthat/test-toggle-projections.R +++ b/tests/testthat/test-toggle-projections.R @@ -26,6 +26,8 @@ test_that( t(Y), test = "normal", cores = 1, logLevel = "DEBUG", skipProjection = TRUE ) + print(mapit$pvalues$p) + print( mapit_no_projections$pvalues$p) correlation <- cor(mapit$pvalues$p, mapit_no_projections$pvalues$p) print(correlation)