From 0d22c8ccae686fb851c04a043856b19612a1ba27 Mon Sep 17 00:00:00 2001 From: Raphael Bednarsky <72978444+bednarsky@users.noreply.github.com> Date: Wed, 8 Jan 2025 10:52:41 +0100 Subject: [PATCH] Add model matrix as output in rule to allow downstream rules to make use of it e.g., contrasts (#30) --- workflow/rules/dea.smk | 1 + workflow/scripts/limma.R | 3 ++- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/workflow/rules/dea.smk b/workflow/rules/dea.smk index 9d232a6..ebcbe82 100644 --- a/workflow/rules/dea.smk +++ b/workflow/rules/dea.smk @@ -6,6 +6,7 @@ rule dea: output: dea_results = os.path.join(result_path,'{analysis}','results.csv'), lmfit_object = os.path.join(result_path,'{analysis}','lmfit_object.rds'), + model_matrix = os.path.join(result_path,'{analysis}','model_matrix.csv'), resources: mem_mb=config.get("mem", "16000"), threads: config.get("threads", 1) diff --git a/workflow/scripts/limma.R b/workflow/scripts/limma.R index 63b472f..5d486ae 100644 --- a/workflow/scripts/limma.R +++ b/workflow/scripts/limma.R @@ -11,6 +11,7 @@ metadata_path <- snakemake@input[[2]] # outputs dea_result_path <- snakemake@output[["dea_results"]] lmfit_object_path <- snakemake@output[["lmfit_object"]] +model_matrix_path <- snakemake@output[["model_matrix"]] # parameters feature_annotation <- snakemake@params[["feature_annotation"]] @@ -94,7 +95,7 @@ for(col in names(reference_levels)){ model_matrix <- model.matrix(design, metadata) # save model matrix # write.csv(model_matrix, file=file.path(result_dir,"model_matrix.csv")) -fwrite(as.data.frame(model_matrix), file=file.path(result_dir,"model_matrix.csv"), row.names=TRUE) +fwrite(as.data.frame(model_matrix), file=file.path(model_matrix_path), row.names=TRUE) # check if the model represented by the design matrix has full rank ie linearly independent columns, which is required to make the model identifiable! # new: more efficient and computationally stable compared to the svd() function, especially for large matrices, and it does not require rounding the singular values or checking for non-zero values.