-
Notifications
You must be signed in to change notification settings - Fork 27
/
Copy pathrunCCA.R
741 lines (715 loc) · 27.2 KB
/
runCCA.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
#' Canonical Correspondence Analysis and Redundancy Analysis
#'
#' These functions perform Canonical Correspondence Analysis on data stored
#' in a \code{SummarizedExperiment}.
#'
#' @inheritParams getDominant
#' @inheritParams getDissimilarity
#'
#' @param formula \code{formula}. If \code{x} is a
#' \code{\link[SummarizedExperiment:SummarizedExperiment-class]{SummarizedExperiment}}
#' a formula can be supplied. Based on the right-hand side of the given formula
#' \code{colData} is subset to \code{col.var}.
#'
#' \code{col.var} and \code{formula} can be missing, which turns the CCA
#' analysis into a CA analysis and dbRDA into PCoA/MDS.
#'
#' @param data \code{data.frame} or coarcible to one. The covariance table
#' including covariates defined by \code{formula}.
#'
#' @param col.var \code{Character scalar}. When \code{x} is a
#' \code{SummarizedExperiment},\code{col.var} can be used to specify variables
#' from \code{colData}.
#'
#' @param variables Deprecated. Use \code{col.var} instead.
#'
#' @param test.signif \code{Logical scalar}. Should the PERMANOVA and analysis
#' of multivariate homogeneity of group dispersions be performed.
#' (Default: \code{TRUE})
#'
#' @param altexp \code{Character scalar} or \code{integer scalar}. Specifies an
#' alternative experiment containing the input data.
#'
#' @param name \code{Character scalar}. A name for the \code{reducedDim()}
#' where results will be stored. (Default: \code{"CCA"})
#'
#' @param exprs_values Deprecated. Use \code{assay.type} instead.
#'
#' @param ... additional arguments passed to vegan::cca or vegan::dbrda and
#' other internal functions.
#' \itemize{
#' \item \code{method} a dissimilarity measure to be applied in dbRDA and
#' possible following homogeneity test. (Default: \code{"euclidean"})
#'
#' \item \code{scale}: \code{Logical scalar}. Should the expression values be
#' standardized? \code{scale} is disabled when using \code{*RDA} functions.
#' Please scale before performing RDA. (Default: \code{TRUE})
#'
#' \item \code{na.action}: \code{function}. Action to take when missing
#' values for any of the variables in \code{formula} are encountered.
#' (Default: \code{na.fail})
#'
#' \item \code{full} \code{Logical scalar}. Should all the results from the
#' significance calculations be returned. When \code{FALSE}, only
#' summary tables are returned. (Default: \code{FALSE})
#'
#' \item \code{homogeneity.test}: \code{Character scalar}. Specifies
#' the significance test used to analyse
#' \code{\link[vegan:betadisper]{vegan::betadisper}} results.
#' Options include 'permanova'
#' (\code{\link[vegan:permutest]{vegan::permutest}}), 'anova'
#' (\code{\link[stats:anova]{stats::anova}}) and 'tukeyhsd'
#' (\code{\link[stats:TukeyHSD]{stats::TukeyHSD}}).
#' (Default: \code{"permanova"})
#'
#' \item \code{permutations} a numeric value specifying the number of
#' permutations for significance testing in \code{vegan::anova.cca}.
#' (Default: \code{999})
#' }
#'
#' @details
#' *CCA functions utilize \code{vegan:cca} and *RDA functions
#' \code{vegan:dbRDA}. By default, dbRDA is done with euclidean distances, which
#' is equivalent to RDA. \code{col.var} and \code{formula} can be missing,
#' which turns the CCA analysis into a CA analysis and dbRDA into PCoA/MDS.
#'
#' Significance tests are done with \code{vegan:anova.cca} (PERMANOVA). Group
#' dispersion, i.e., homogeneity within groups is analyzed with
#' \code{\link[vegan:betadisper]{vegan::betadisper}}
#' (multivariate homogeneity of groups dispersions
#' (variances)) and statistical significance of homogeneity is tested with a
#' test specified by \code{homogeneity.test} parameter.
#'
#' @return
#' For \code{getCCA} a matrix with samples as rows and CCA dimensions as
#' columns. Attributes include output from \code{\link[vegan:scores]{scores}},
#' eigenvalues, the \code{cca}/\code{rda} object and significance analysis
#' results.
#'
#' For \code{addCCA} a modified \code{x} with the results stored in
#' \code{reducedDim} as the given \code{name}.
#'
#' @name runCCA
#' @seealso
#' For more details on the actual implementation see
#' \code{\link[vegan:cca]{cca}} and \code{\link[vegan:dbrda]{dbrda}}
#'
#' @examples
#' library(miaViz)
#' data("enterotype", package = "mia")
#' tse <- enterotype
#'
#' # Perform CCA and exclude any sample with missing ClinicalStatus
#' tse <- addCCA(
#' tse,
#' formula = data ~ ClinicalStatus,
#' na.action = na.exclude
#' )
#'
#' # Plot CCA
#' plotCCA(tse, "CCA", colour_by = "ClinicalStatus")
#'
#' # Fetch significance results
#' attr(reducedDim(tse, "CCA"), "significance")
#'
#' tse <- transformAssay(tse, method = "relabundance")
#'
#' # Specify dissimilarity measure
#' tse <- addRDA(
#' tse,
#' formula = data ~ ClinicalStatus,
#' assay.type = "relabundance",
#' method = "bray",
#' name = "RDA_bray",
#' na.action = na.exclude
#' )
#'
#' # To scale values when using *RDA functions, use
#' # transformAssay(MARGIN = "features", ...)
#' tse <- transformAssay(tse, method = "standardize", MARGIN = "features")
#'
#' # Data might include taxa that do not vary. Remove those because after
#' # z-transform their value is NA
#' tse <- tse[rowSums(is.na(assay(tse, "standardize"))) == 0, ]
#'
#' # Calculate RDA
#' tse <- addRDA(
#' tse,
#' formula = data ~ ClinicalStatus,
#' assay.type = "standardize",
#' name = "rda_scaled",
#' na.action = na.omit
#' )
#'
#' # Plot RDA
#' plotRDA(tse, "rda_scaled", colour_by = "ClinicalStatus")
#'
#' # A common choice along with PERMANOVA is ANOVA when statistical significance
#' # of homogeneity of groups is analysed. Moreover, full significance test
#' # results can be returned.
#' tse <- addRDA(
#' tse,
#' formula = data ~ ClinicalStatus,
#' homogeneity.test = "anova",
#' full = TRUE
#' )
#'
#' # Example showing how to pass extra parameters, such as 'permutations',
#' # to anova.cca
#' tse <- addRDA(
#' tse,
#' formula = data ~ ClinicalStatus,
#' permutations = 500
#' )
#'
NULL
############################# Exported CCA methods #############################
#' @export
#' @rdname runCCA
#' @aliases getCCA
calculateCCA <- function(x, ...){
getCCA(x, ...)
}
#' @export
#' @rdname runCCA
#' @aliases addCCA
runCCA <- function(x, ...){
addCCA(x, ...)
}
#' @export
#' @rdname runCCA
setMethod("getCCA", "ANY", function(x, formula, data, ...){
if( !is.matrix(x) ){
stop("'x' must be matrix.", call. = FALSE)
}
if( !is(formula, "formula") ){
stop("'formula' must be formula or NULL.", call. = FALSE)
}
if( !(is.data.frame(data) || is.matrix(data) || is(data, "DFrame")) ){
stop("'data' must be data.frame or coarcible to one.", call. = FALSE)
}
if( ncol(x) != nrow(data) ){
stop("Number of columns in 'x' should match with number of rows in ",
"'data'.", call. = FALSE)
}
#
res <- .calculate_rda(
x, formula = formula, data = data, ord.method = "CCA", ...)
return(res)
})
#' @export
#' @rdname runCCA
setMethod("getCCA", "SummarizedExperiment",
function(
x, formula = NULL, col.var = variables, variables = NULL,
test.signif = TRUE, assay.type = assay_name, assay_name = exprs_values,
exprs_values = "counts", ...){
############################# Input check ##############################
if( !(is.null(formula) || is(formula, "formula")) ){
stop("'formula' must be formula or NULL.", call. = FALSE)
}
if( !(is.null(col.var) ||
(is.character(col.var) &&
all(col.var %in% colnames(colData(x))))) ){
stop("'col.var' must specify column from colData(x) or be NULL.",
call. = FALSE)
}
if( !is.null(formula) && !is.null(col.var) ){
stop("Specify either 'formula' or 'col.var'.", call. = FALSE)
}
.check_assay_present(assay.type, x)
if( !.is_a_bool(test.signif) ){
stop("'test.signif' must be TRUE or FALSE.", call. = FALSE)
}
########################### Input check end ############################
# Get assay
mat <- assay(x, assay.type)
# Get formula and variables
temp <- .get_formula_and_covariates(x, formula, col.var)
formula <- temp[["formula"]]
covariates <- temp[["variables"]]
# Calculate CCA
cca <- getCCA(mat, formula = formula, data = covariates, ...)
# Test significance if specified
if( test.signif ){
res <- .test_rda(mat, attr(cca, "obj"), covariates, ...)
attr(cca, "significance") <- res
}
return(cca)
}
)
#' @export
#' @rdname runCCA
#' @importFrom SingleCellExperiment reducedDim<-
setMethod("addCCA", "SingleCellExperiment",
function(x, altexp = NULL, name = "CCA", ...){
############################# Input check ##############################
if( !(is.null(altexp) ||
(length(altexp) == 1L && is.character(altexp) &&
all(altexp %in% altExpNames(x))) ||
(.is_an_integer(altexp) && altexp > 0 &&
altexp <= length(altExps(x))) ) ){
stop("'altexp' must specify an alternative experiment from ",
"altExp(x).", call. = FALSE)
}
########################### Input check end ############################
# Get TreeSE from altexp if specified.
if( !is.null(altexp) ){
y <- altExp(x, altexp)
} else {
y <- x
}
# Calculate CCA
cca <- getCCA(y, ...)
# Add object to reducedDim
x <- .add_object_to_reduceddim(x, cca, name = name, ...)
return(x)
}
)
############################# Exported RDA methods #############################
#' @export
#' @rdname runCCA
#' @aliases getRDA
calculateRDA <- function(x, ...){
getRDA(x, ...)
}
#' @export
#' @rdname runCCA
#' @aliases addRDA
runRDA <- function(x, ...){
addRDA(x, ...)
}
#' @export
#' @rdname runCCA
setMethod("getRDA", "ANY", function(x, formula, data, ...){
if( !is.matrix(x) ){
stop("'x' must be matrix.", call. = FALSE)
}
if( !is(formula, "formula") ){
stop("'formula' must be formula or NULL.", call. = FALSE)
}
if( !(is.data.frame(data) || is.matrix(data) || is(data, "DFrame")) ){
stop("'data' must be data.frame or coarcible to one.", call. = FALSE)
}
if( ncol(x) != nrow(data) ){
stop("Number of columns in 'x' should match with number of rows in ",
"'data'.", call. = FALSE)
}
#
res <- .calculate_rda(
x, formula = formula, data = data, ord.method = "RDA", ...)
return(res)
})
#' @export
#' @rdname runCCA
setMethod("getRDA", "SummarizedExperiment",
function(
x, formula = NULL, col.var = variables, variables = NULL,
test.signif = TRUE, assay.type = assay_name, assay_name = exprs_values,
exprs_values = "counts", ...){
############################# Input check ##############################
if( !(is.null(formula) || is(formula, "formula")) ){
stop("'formula' must be formula or NULL.", call. = FALSE)
}
if( !(is.null(col.var) ||
(is.character(col.var) &&
all(col.var %in% colnames(colData(x))))) ){
stop("'col.var' must specify column from colData(x) or be NULL.",
call. = FALSE)
}
if( !is.null(formula) && !is.null(col.var) ){
stop("Specify either 'formula' or 'col.var'.", call. = FALSE)
}
.check_assay_present(assay.type, x)
if( !.is_a_bool(test.signif) ){
stop("'test.signif' must be TRUE or FALSE.", call. = FALSE)
}
########################### Input check end ############################
# Get assay
mat <- assay(x, assay.type)
# Get formula and variables
temp <- .get_formula_and_covariates(x, formula, col.var)
formula <- temp[["formula"]]
covariates <- temp[["variables"]]
# Calculate CCA
rda <- getRDA(mat, formula = formula, data = covariates, ...)
# Test significance if specified
if( test.signif ){
res <- .test_rda(mat, attr(rda, "obj"), covariates, ...)
attr(rda, "significance") <- res
}
return(rda)
}
)
#' @export
#' @rdname runCCA
#' @importFrom SingleCellExperiment reducedDim<-
setMethod("addRDA", "SingleCellExperiment",
function(x, altexp = NULL, name = "RDA", ...){
############################# Input check ##############################
if( !(is.null(altexp) ||
(length(altexp) == 1L && is.character(altexp) &&
all(altexp %in% altExpNames(x))) ||
(.is_an_integer(altexp) && altexp > 0 &&
altexp <= length(altExps(x))) ) ){
stop("'altexp' must specify an alternative experiment from ",
"altExp(x).", call. = FALSE)
}
########################### Input check end ############################
# Get TreeSE from altexp if specified.
if( !is.null(altexp) ){
y <- altExp(x, altexp)
} else {
y <- x
}
# Calculate RDA
rda <- getRDA(y, ...)
# Add object to reducedDim
x <- .add_object_to_reduceddim(x, rda, name = name, ...)
return(x)
}
)
################################ HELP FUNCTIONS ################################
# THis function creates a formula and covariate table that can be then
# forwarded to subsequent functions. The formula/variables are created based
# on usr-specified formual or variables. If neither is specified, the function
# returns empty table with corresponding formula.
.get_formula_and_covariates <- function(x, formula, col.var){
# If formula is specified
if( !is.null(formula) ){
# Otherwise if formula is provided, get variables based on formula
variables <- .get_variables_based_on_formula(x, formula)
} else if( !is.null(col.var) ){
# If column variables are specified, create a formula based on them and
# get variables
formula <- as.formula(
paste0("mat ~ `", paste(col.var, collapse = "` + `"), "`"))
# Get the data from colData
variables <- colData(x)[ , col.var, drop = FALSE]
} else{
# Else either formula nor column variables were specified. Get empty
# variable table and formula that does not incorporate covariates.
variables <- colData(x)[, 0]
formula <- mat ~ 1
}
# Return a list that holds both formula and covariates
res <- list(formula = formula, variables = variables)
return(res)
}
# This function fetch variables from colData based on formula. If formula
# was not specified, the function returns an empty table.
#' @importFrom stats terms
#' @importFrom SummarizedExperiment colData
.get_variables_based_on_formula <- function(x, formula){
# If user specified "all" covariates, give error. User should use col.var
# to do that.
if( as.character(formula)[[3]] == "." ){
stop("To incorporate all variables from colData(x) with 'x~.', use ",
"'col.var' and leave 'formula' unspecified.", call. = FALSE)
}
# Get variables from formula
terms <- rownames(attr(terms(formula), "factors"))
terms <- terms[terms != as.character(formula)[2L]]
terms <- .remove_special_functions_from_terms(terms)
# Check that all variables specify a column from colData
if( !all(terms %in% colnames(colData(x))) ){
stop("All variables on the right hand side of 'formula' must be ",
"present in colData(x).", call. = FALSE)
}
# Get the variables from colData
df <- colData(x)[, terms, drop = FALSE]
return(df)
}
# This function parses right-hand side formula so that it now includes only
# the covariates.
.remove_special_functions_from_terms <- function(terms){
names(terms) <- terms
m <- regexec("^Condition\\(([^\\(\\)]*)\\)$|^([^\\(\\)]*)$", terms)
m <- regmatches(terms, m)
terms <- vapply(m, function(n){
n <- n[seq.int(2L,length(n))]
n <- n[n != ""]
return(n)
}, character(1))
return(terms)
}
# This function performs dbRDA or CCA. It returns side scores with other
# information scores in attributes.
#' @importFrom stats as.formula na.fail
#' @importFrom vegan cca dbrda sppscores<- eigenvals scores
.calculate_rda <- function(
x, formula, data, scores, scale = TRUE, na.action = na.fail,
method = distance, distance = "euclidean", ord.method = "CCA", ...){
# input check
if(!.is_a_bool(scale)){
stop("'scale' must be TRUE or FALSE.", call. = FALSE)
}
if( !.is_a_string(method) ){
stop("'method' must be a single string value.", call. = FALSE)
}
if( !(.is_a_string(ord.method) && ord.method %in% c("CCA", "RDA")) ){
stop("'ord.method' must be 'CCA' or 'RDA'.", call. = FALSE)
}
#
# Get data in correct orientation. Samples should be in rows in abundance
# table.
x <- as.matrix(t(x))
data <- data.frame(data, check.names = FALSE)
# Instead of letting na.action pass through, give informative error
# about missing values.
if( any(is.na(data)) && isTRUE(all.equal(na.action, na.fail)) ){
stop("Variables contain missing values. Set na.action to na.exclude",
" to remove samples with missing values.", call. = FALSE)
}
# Ensure that the left hand side of the formula points to abundance matrix
formula <- as.character(formula)
formula[[2]] <- "x"
# Create a formula from string
formula <- as.formula(
paste(as.character(formula)[c(2,1,3)], collapse = " "))
# Initialize an argument list with common arguments
args <- c(
list(formula = formula, data = data, na.action = na.action), list(...))
# vegan::cca function has a scale parameter that is not in dbrda
if( ord.method == "CCA" ){
args <- c(args, list(scale = scale))
}
# vegan::dbRDA has additional distance parameter that specifies
# dissimilarity metric
if( ord.method == "RDA" ){
args <- c(args, list(distance = method))
}
# Perform CCA or RDA
ord_FUN <- if (ord.method == "CCA") cca else dbrda
res_obj <- do.call(ord_FUN, args)
# The function stores the call. However, as we use do.call(), the function
# sores the call incorrectly (e.g., it prints whole abundance table). That
# is why we modify the call to have only necessary information.
na_action_name <- deparse(substitute(na.action))
na_action_name <- paste(na_action_name, collapse = " ")
na_action_name <- sub('.*UseMethod\\("([^"]+)"\\).*', '\\1', na_action_name)
res_obj$call <- paste0(
res_obj$method, "(formula = ", deparse(formula),
", data = data",
ifelse(ord.method == "CCA", paste0(", scale = ", scale), ""),
ifelse(ord.method == "RDA", paste0(", distance = ", method), ""),
", na.action = ", na_action_name, ", ...)")
# Add species scores to rda object since they are missing
# (in cca they are included). If we used na.action, some samples might be
# removed. That is why we have to subset the abundance table first by
# filtering missing values.
if( ord.method == "RDA" ){
if( !is.null(res_obj$na.action) ){
x <- x[-res_obj$na.action, ]
}
sppscores(res_obj) <- x
}
# Get eigenvalues from the object
eig <- eigenvals(res_obj)
# Get total number of coordinates. There might be imaginary axes, exclude
# them because otherwise error occurs in scores() call.
tot_num_coord <- sum(eig >= 0)
# Now we know how many coordinates there are. We use that info to get
# scores for all coordinates.
res_values <- vegan::scores(res_obj, seq_len(tot_num_coord))
# Get the site scores. They represent samples' coordinates in ordinated
# space.
res_mat <- res_values[["sites"]]
# Add other values as attributes
res_values <- c(res_values, list(eig = eig))
attributes(res_mat) <- c(
attributes(res_mat), list(obj = res_obj), res_values)
return(res_mat)
}
# Perform PERMANOVA and homogeneity analysis to RDA/CCA object
#' @importFrom vegan anova.cca betadisper
#' @importFrom stats anova
.test_rda <- function(mat, rda, variables, ...){
# Perform permanova for whole model and for variables
permanova_model <- anova.cca(rda, by = NULL)
if( !is.null(variables) ){
res <- .test_rda_vars(
mat, rda, variables, permanova_model, ...)
} else{
res <- list(permanova = permanova_model)
}
return(res)
}
# Test association of variables to ordination
#' @importFrom vegan anova.cca
.test_rda_vars <- function(
mat, rda, variables, permanova_model, by = "margin", full = FALSE, ...){
# Check full parameter
if( !.is_a_bool(full) ){
stop("'full' must be TRUE or FALSE.", call. = FALSE)
}
#
# Perform PERMANOVA
permanova <- anova.cca(rda, by = by, ...)
# Create a table from the results
# PERMANOVAs
table_model <- as.data.frame(permanova_model)
permanova_tab <- as.data.frame(permanova)
# Take only model results
permanova_tab <- rbind(table_model[1, ], permanova_tab)
# Add info about total variance
permanova_tab[ , "Total variance"] <- permanova_tab["Model", 2] +
permanova_tab["Residual", 2]
# Add info about explained variance
permanova_tab[ , "Explained variance"] <- permanova_tab[ , 2] /
permanova_tab[ , "Total variance"]
# Perform homogeneity analysis
homogeneity <- .calculate_homogeneity(mat, variables, full = full, ...)
# Return whole data or just a tables
permanova_res <- permanova_tab
if( full ){
permanova_res <- list(
summary = permanova_tab,
model = permanova_model,
variables = permanova)
}
res <- list(permanova = permanova_res, homogeneity = homogeneity)
return(res)
}
#' @importFrom vegan vegdist betadisper
.calculate_homogeneity <- function(
mat, variables, method = distance, distance = "euclidean",
homogeneity.test = "permanova", full = FALSE, ...){
# Check homogeneity.test
if( !(.is_a_string(homogeneity.test) &&
homogeneity.test %in% c("permanova", "anova", "tukeyhsd")) ){
stop("'homogeneity.test' must be 'permanova', 'anova', or 'tukeyhsd'.",
call. = FALSE)
}
# Check full parameter
if( !.is_a_bool(full) ){
stop("'full' must be TRUE or FALSE.", call. = FALSE)
}
# Check that there are more than one group per variable. If there are only
# one group, we cannot calculate homogeneity for those variables.
cols <- vapply(
variables, function(x) length(unique(x))>1, FUN.VALUE = logical(1))
if( any(!cols) ){
warning("The following variables contain only one group. Cannot ",
"calculate homogeneity for them: '",
paste0(names(cols)[!cols], collapse = "','"), "'", call. = FALSE)
variables <- variables[, cols, drop = FALSE]
}
#
# Calculate dissimilarity matrix
mat <- t(mat)
diss_mat <- vegdist(mat, method = method, ...)
# For all variables run the analysis
homogeneity <- lapply(colnames(variables), function(x){
# Get variable values
var <- variables[[x]]
# Run betadisper.
# Suppress possible warnings: "some squared distances are negative and
# changed to zero" Suppress possible messages:
# "missing observations due to 'group' removed"
suppressWarnings(
suppressMessages(
betadisper_res <- betadisper(diss_mat, group = var)
)
)
# Run significance test
significance <- .homogeneity_significance(
betadisper_res, homogeneity.test, ...)
# Return the results as a list
models <- list(betadisper_res, significance[["obj"]])
names(models) <- c("betadisper", homogeneity.test)
res <- list(
models = models,
table = significance[["table"]]
)
return(res)
})
names(homogeneity) <- colnames(variables)
# Get models
homogeneity_model <- lapply(homogeneity, function(x) x[["models"]])
names(homogeneity_model) <- colnames(variables)
# Get tables
homogeneity_tab <- lapply(homogeneity, function(x) x[["table"]])
# Combine tables
res <- do.call(rbind, homogeneity_tab)
# Return either only summary table or whole result including fitterd models
if( full ){
res <- list(summary = res, variables = homogeneity_model)
}
return(res)
}
# Perform statistical test for group homogeneity results
#' @importFrom stats anova TukeyHSD
#' @importFrom vegan permutest
.homogeneity_significance <- function(betadisper_res, homogeneity.test, ...){
# Run specified significance test
FUN <- switch(homogeneity.test,
"permanova" = permutest,
"anova" = anova,
"tukeyhsd" = TukeyHSD
)
res <- do.call(FUN, c(list(betadisper_res), ...))
# Get summary table from the results
if( homogeneity.test == "anova" ){
tab <- as.data.frame(res)
} else if( homogeneity.test == "tukeyhsd" ){
tab <- res[["group"]]
} else{
tab <- res[["tab"]]
}
# Modify permanova/anova table
if( homogeneity.test != "tukeyhsd" ){
# Add info about total variance
tab[ , "Total variance"] <- tab["Groups", "Sum Sq"] +
tab["Residuals", "Sum Sq"]
# Add info about explained variance
tab[ , "Explained variance"] <- tab[ , "Sum Sq"] /
tab[ , "Total variance"]
# Get only groups row (drop residuals row)
tab <- tab[1, ]
}
# Create list from the object and results
res <- list(
obj = res,
table = tab
)
return(res)
}
# Add RDA/CCA to reducedDim
.add_object_to_reduceddim <- function(
tse, rda, name, subset.result = TRUE, ...){
# Test subset
if( !.is_a_bool(subset.result) ){
stop("'subset.result' must be TRUE or FALSE.", call. = FALSE)
}
#
# If samples do not match / there were samples without appropriate metadata
# and they are now removed
if( !all(colnames(tse) %in% rownames(rda)) && subset.result ){
# Take a subset
tse <- tse[ , rownames(rda) ]
# Give a message
warning(
"Certain samples are removed from the result because they did ",
"not include sufficient metadata.", call. = FALSE)
} else if( !all(colnames(tse) %in% rownames(rda)) && !subset.result ){
# If user do not want to subset the data
# Save attributes from the object
attr <- attributes(rda)
attr <- attr[ !names(attr) %in% c("dim", "dimnames")]
# Find samples that are removed
samples_not_found <- setdiff(colnames(tse), rownames(rda))
# Create an empty matrix
mat <- matrix(nrow = length(samples_not_found), ncol=ncol(rda))
rownames(mat) <- samples_not_found
# Combine the data and order it in correct order
rda <- rbind(rda, mat)
rda <- rda[colnames(tse), ]
# Add attributes
attr <- c(attributes(rda), attr)
attributes(rda) <- attr
}
# Add object to reducedDIm
reducedDim(tse, name) <- rda
return(tse)
}