diff --git a/testing_sophia/duration_scaling.R b/testing_sophia/duration_scaling.R index b6a0973..dd240e0 100644 --- a/testing_sophia/duration_scaling.R +++ b/testing_sophia/duration_scaling.R @@ -1,4 +1,8 @@ +library(tidyr) +library(tidyverse) library(dplyr) +library(mvMAPIT) +library(ggplot2) set.seed(1234) @@ -43,8 +47,8 @@ for(i in 1:nrow(pairs)) { skipProjection = TRUE ) - df <- rbind(df, mvmapit_normal$duration %>% mutate(n_samples = n_samples, n_snps = n_snps, projections = "False")) - df <- rbind(df, mvmapit_normal_projection$duration %>% mutate(n_samples = n_samples, n_snps = n_snps, projections = "True")) + df <- rbind(df, mvmapit_normal$duration %>% mutate(n_samples = n_samples, n_snp = n_snp, projection = "False")) + df <- rbind(df, mvmapit_normal_projection$duration %>% mutate(n_samples = n_samples, n_snp = n_snp, projection = "True")) #df <- rbind(df, c(mvmapit_normal$duration$duration_ms, n_samples = n_samples, #n_snps = n_snps, projections = "False")) @@ -59,5 +63,7 @@ for(i in 1:nrow(pairs)) { #df$total_duration <- rowSums(df[ , c(1, 2, 3, 4, 5, 6)], na.rm=TRUE) -result <- df %>% group_by(n_samples, n_snps, projections) %>% summarise(total = sum(duration_ms)) -view(result) +result <- df %>% group_by(n_sample, n_snp, projection) %>% summarise(total = sum(duration_ms)) + +saveRDS(result, "/oscar/data/lcrawfo1/sli347/duration_scaling.RDS") + diff --git a/testing_sophia/repeated_trials.R b/testing_sophia/repeated_trials.R index d61d813..e153719 100644 --- a/testing_sophia/repeated_trials.R +++ b/testing_sophia/repeated_trials.R @@ -11,21 +11,22 @@ mvmapit_simulation <- function(data_type = c("null", "alternative")) { df <- NULL df_statistics <- data.frame(trial = c(), correlation_p = c()) #add heretibility and hypo + genotype_data <- readRDS("/oscar/data/lcrawfo1/sli347/biobank_1_10000.rds") if (data_type == "null"){ n_pleiotropic = 0 n_trait_specific = 0 - print("null") } else { n_pleiotropic = 10 n_trait_specific = 10 - print("alternative") } for (x in 1:5) { + sample_ids <- sample(1:10000, 5000, replace = FALSE) + simulated_data <- simulate_traits( - genotype_data, + genotype_data[sample_ids, 1:5000], n_causal = 1000, n_trait_specific, n_pleiotropic, @@ -44,16 +45,18 @@ mvmapit_simulation <- function(data_type = c("null", "alternative")) { #run on smaller data set -- to test #might want to randomize columns + + mvmapit <- mvmapit( - t(simulated_data$genotype[1:100, 1:100]), - t(simulated_data$trait[1:100, ]), + t(simulated_data$genotype), + t(simulated_data$trait), test = "normal", #could add into method inputs the type of test we want to run - matcharg skipProjection = FALSE ) mvmapit_projection <- mvmapit( - t(simulated_data$genotype[1:100, 1:100]), - t(simulated_data$trait[1:100, ]), + t(simulated_data$genotype), + t(simulated_data$trait), test = "normal", skipProjection = TRUE ) @@ -74,7 +77,6 @@ mvmapit_simulation <- function(data_type = c("null", "alternative")) { di <- "exp" de <- FALSE # enabling the detrend option - # TODO: change causal snp facet labels gg <- df %>% ggplot(mapping = aes( sample = -log10(p) )) + @@ -89,22 +91,21 @@ mvmapit_simulation <- function(data_type = c("null", "alternative")) { labs(x = bquote("Theoretical Quantiles " -log[10](p)), y = bquote("Sample Quantiles " -log[10](p))) + facet_wrap(~projections) # check this one - show(gg) + ggsave("/oscar/data/lcrawfo1/sli347/repeated_qqplot.png", plot = gg, width = 6, height = 4, unit = "in", dpi = 300) + #evaluate distribution of p-value correlations colnames(df_statistics) <- c('trial', 'correlation_p') - box<- ggplot(df_statistics, aes(y = correlation_p)) + + box <- ggplot(df_statistics, aes(y = correlation_p)) + geom_boxplot() + scale_x_discrete() + labs(title = "Projection correlation distribution", y = "correlation coefficent") - show(box) - return(box) + ggsave("/oscar/data/lcrawfo1/sli347/repeated_boxplot.png", plot = box, width = 6, height = 4, unit = "in", dpi = 300) + } -box <- mvmapit_simulation("null") -show(box) - +mvmapit_simulation("normal") diff --git a/testing_sophia/slurmjob.sh b/testing_sophia/slurmjob.sh new file mode 100644 index 0000000..91aefb2 --- /dev/null +++ b/testing_sophia/slurmjob.sh @@ -0,0 +1,16 @@ +#!/bin/bash +#SBATCH --time=0-01:00 # Runtime in D-HH:MM +#SBATCH --job-name="duration" +#SBATCH --mem-per-cpu=10G +#SBATCH --cpus-per-task=2 +#SBATCH -n 1 +#SBATCH -p "batch" + + +NPROC=$((SLURM_JOB_CPUS_PER_NODE * SLURM_NNODES)) +echo "${NPROC} threads" +export OMP_NUM_THREADS=$NPROC + +module load r/4.3.1 + +Rscript --vanilla "duration_scaling.R" \ No newline at end of file diff --git a/testing_sophia/slurmjob_repeated.sh b/testing_sophia/slurmjob_repeated.sh new file mode 100644 index 0000000..3b48e23 --- /dev/null +++ b/testing_sophia/slurmjob_repeated.sh @@ -0,0 +1,16 @@ +#!/bin/bash +#SBATCH --time=0-01:00 # Runtime in D-HH:MM +#SBATCH --job-name="simulating trials" +#SBATCH --mem-per-cpu=10G +#SBATCH --cpus-per-task=2 +#SBATCH -n 1 +#SBATCH -p "batch" + + +NPROC=$((SLURM_JOB_CPUS_PER_NODE * SLURM_NNODES)) +echo "${NPROC} threads" +export OMP_NUM_THREADS=$NPROC + +module load r/4.3.1 + +Rscript --vanilla "repeated_trials.R" \ No newline at end of file