From 4bceb8e97a57cd4bc555b1a57aae5b6c9e5a4840 Mon Sep 17 00:00:00 2001 From: Russ Hyde Date: Fri, 4 Nov 2022 11:38:12 +0000 Subject: [PATCH 1/9] refac: remove commented code --- R/treeview.R | 43 +++++++++++++++++++++++-------------------- 1 file changed, 23 insertions(+), 20 deletions(-) diff --git a/R/treeview.R b/R/treeview.R index 984ce5a..fa8b402 100644 --- a/R/treeview.R +++ b/R/treeview.R @@ -300,7 +300,6 @@ treeview <- function(e0, round(lgr * 100), "%" ), `Mol clock outlier` = clock_outlier, - # `Structure Z` = treestructure_z, `Lineages` = lineages ) ) @@ -384,19 +383,18 @@ treeview <- function(e0, gtr1.1$data$defmuts <- ttdefmuts gtr1.1$data$allmuts <- ttallmuts if (!is.null(mut_regex)) { - # gtr1.1$data$label <- '' for (mre in mut_regex) { i <- which(grepl(gtr1.1$data$allmuts, pattern = mre)) - # gtr1.1$data$label[i] <- '*' gtr1.1$data[[mre]] <- grepl(gtr1.1$data$allmuts, pattern = mre) } - # gtr1.1 <- gtr1.1 + geom_tiplab( size = 16, colour = 'red' ) } - genotype <- as.data.frame(gtr1.1$data[ - gtr1.1$data$node <= ape::Ntip(tr2), - c("label", mut_regex) - ]) + genotype <- as.data.frame( + gtr1.1$data[ + gtr1.1$data$node <= ape::Ntip(tr2), + c("label", mut_regex) + ] + ) rownames(genotype) <- genotype$label genotype <- genotype[, -1, drop = FALSE] @@ -412,7 +410,8 @@ treeview <- function(e0, ) }) gtr1.1$data$colour_var <- gtr1.1$data[[vn]] - gtr1.2 <- ggtree::gheatmap(gtr1.1, + gtr1.2 <- ggtree::gheatmap( + gtr1.1, genotype, width = heatmap_width, offset = 0.0005, @@ -423,15 +422,17 @@ treeview <- function(e0, ) gtr1.3 <- gtr1.2 + - ggiraph::geom_point_interactive(ggplot2::aes( - x = .data$x, - y = .data$y, - color = .data$colour_var, - tooltip = .data$mouseover, - data_id = .data$node, - size = .data$cluster_size + 1, - shape = as.factor(.data$internal) - )) + + ggiraph::geom_point_interactive( + ggplot2::aes( + x = .data$x, + y = .data$y, + color = .data$colour_var, + tooltip = .data$mouseover, + data_id = .data$node, + size = .data$cluster_size + 1, + shape = as.factor(.data$internal) + ) + ) + ggplot2::scale_shape_manual( name = NULL, labels = NULL, @@ -458,11 +459,13 @@ treeview <- function(e0, height_svg = max(14, floor(ape::Ntip(tr2) / 10)) ) - htmlwidgets::saveWidget(pgtr1.3, + htmlwidgets::saveWidget( + pgtr1.3, file = as.character(glue::glue("{output_dir}/tree-{vn}.html")), title = glue::glue("SARS CoV 2 scan {Sys.Date()}") ) - file.copy(as.character(glue::glue("{output_dir}/tree-{vn}.html")), + file.copy( + as.character(glue::glue("{output_dir}/tree-{vn}.html")), as.character(glue::glue("{output_dir}/tree-{vn}-{Sys.Date()}.html")), overwrite = TRUE ) From 2ad7b090994cc63a00d6c22b330124c6d0192042 Mon Sep 17 00:00:00 2001 From: Russ Hyde Date: Wed, 2 Nov 2022 15:17:16 +0000 Subject: [PATCH 2/9] refac: extract function for making noninteractive ggtree objects from plot_tree --- R/plot_tree.R | 73 +++++++++++++++++++++++++++++ R/treeview.R | 61 +++++------------------- man/create_noninteractive_ggtree.Rd | 43 +++++++++++++++++ 3 files changed, 128 insertions(+), 49 deletions(-) create mode 100644 R/plot_tree.R create mode 100644 man/create_noninteractive_ggtree.Rd diff --git a/R/plot_tree.R b/R/plot_tree.R new file mode 100644 index 0000000..36dc90e --- /dev/null +++ b/R/plot_tree.R @@ -0,0 +1,73 @@ +#' Create a non-interactive \code{ggtree} object for presenting mutation and lineage data +#' +#' @param ggtree_data Tree data for passing to \code{ggtree}. +#' @param branch_col Scalar string. The name of a column within \code{ggtree_data} for which +#' the \code{ggtree} object should be produced. +#' @param lins String. Vector of lineages that are under study. +#' @param lin_nodes Integer. Vector of node numbers. The nodes are defined in +#' \code{ggtree_data$node}. The order of entries matches that for \code{lins}. +#' @param lin_node_names String. Name of the lineage. The order of entries matches that for +#' \code{lins}. +#' @param shapes Shapes for the branches and leaves in the tree. +#' @param colours Vector of colours. +#' @param colour_limits Min and max values for the colours. +#' +#' @return A \code{ggtree} object. + +create_noninteractive_ggtree <- function(ggtree_data, + branch_col, + lins, + lin_nodes, + lin_node_names, + shapes, + colours, + colour_limits) { + gtr1 <- ggtree::ggtree( + ggtree_data, + ggplot2::aes_string(colour = branch_col), + ladderize = TRUE, + right = TRUE, + continuous = TRUE + ) + + gtr1.1 <- gtr1 + + ggplot2::scale_color_gradientn( + name = gsub(branch_col, pattern = "_", replacement = " "), + colours = colours, + limits = colour_limits, + oob = scales::squish + ) + + ggplot2::geom_point( + ggplot2::aes_string( + color = branch_col, + size = "cluster_size", + shape = "as.factor(internal)" + ), + data = gtr1$data + ) + + ggplot2::scale_shape_manual( + name = NULL, + labels = NULL, + values = shapes + ) + + ggplot2::scale_size( + name = "Cluster size", + range = c(2, 16) + ) + + ggplot2::ggtitle(glue::glue("{Sys.Date()}, colour: {branch_col}")) + + ggplot2::theme(legend.position = "top") + + for (i in seq_along(lins)) { + if (!is.na(lin_nodes[i])) { + gtr1.1 <- gtr1.1 + + ggtree::geom_cladelabel( + node = lin_nodes[i], + label = lin_node_names[i], + offset = .00001, + colour = "black" + ) + } + } + + gtr1.1 +} diff --git a/R/treeview.R b/R/treeview.R index fa8b402..9faab97 100644 --- a/R/treeview.R +++ b/R/treeview.R @@ -220,60 +220,23 @@ treeview <- function(e0, if (is.null(colour_limits)) { colour_limits <- range(td[[vn]]) } - gtr1 <- ggtree::ggtree( - dplyr::full_join(tr2, - td, - by = "node" - ), - ggplot2::aes_string(colour = vn), - ladderize = TRUE, - right = TRUE, - continuous = TRUE - ) - shapes <- c( Y = "\U2B24", N = "\U25C4" ) - gtr1.1 <- gtr1 + - ggplot2::scale_color_gradientn( - name = gsub(vn, pattern = "_", replacement = " "), - colours = cols, - limits = colour_limits, - oob = scales::squish - ) + - ggplot2::geom_point( - ggplot2::aes_string( - color = vn, - size = "cluster_size", - shape = "as.factor(internal)" - ), - data = gtr1$data - ) + - ggplot2::scale_shape_manual( - name = NULL, - labels = NULL, - values = shapes - ) + - ggplot2::scale_size( - name = "Cluster size", - range = c(2, 16) - ) + - ggplot2::ggtitle(glue::glue("{Sys.Date()}, colour: {vn}")) + - ggplot2::theme(legend.position = "top") - - for (i in seq_along(lins)) { - if (!is.na(lin_nodes[i])) { - gtr1.1 <- gtr1.1 + - ggtree::geom_cladelabel( - node = lin_nodes[i], - label = lin_node_names[i], - offset = .00001, - colour = "black" - ) - } - } + ggtree_data <- dplyr::full_join(tr2, td, by = "node") + + gtr1.1 <- create_noninteractive_ggtree( + ggtree_data = ggtree_data, + branch_col = vn, + lins = lins, + lin_nodes = lin_nodes, + lin_node_names = lin_node_names, + shapes = shapes, + colours = cols, + colour_limits = colour_limits + ) ggplot2::ggsave( gtr1.1, diff --git a/man/create_noninteractive_ggtree.Rd b/man/create_noninteractive_ggtree.Rd new file mode 100644 index 0000000..e5b6dde --- /dev/null +++ b/man/create_noninteractive_ggtree.Rd @@ -0,0 +1,43 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/plot_tree.R +\name{create_noninteractive_ggtree} +\alias{create_noninteractive_ggtree} +\title{Create a non-interactive \code{ggtree} object for presenting mutation and lineage data} +\usage{ +create_noninteractive_ggtree( + ggtree_data, + branch_col, + lins, + lin_nodes, + lin_node_names, + shapes, + colours, + colour_limits +) +} +\arguments{ +\item{ggtree_data}{Tree data for passing to \code{ggtree}.} + +\item{branch_col}{Scalar string. The name of a column within \code{ggtree_data} for which +the \code{ggtree} object should be produced.} + +\item{lins}{String. Vector of lineages that are under study.} + +\item{lin_nodes}{Integer. Vector of node numbers. The nodes are defined in +\code{ggtree_data$node}. The order of entries matches that for \code{lins}.} + +\item{lin_node_names}{String. Name of the lineage. The order of entries matches that for +\code{lins}.} + +\item{shapes}{Shapes for the branches and leaves in the tree.} + +\item{colours}{Vector of colours.} + +\item{colour_limits}{Min and max values for the colours.} +} +\value{ +A \code{ggtree} object. +} +\description{ +Create a non-interactive \code{ggtree} object for presenting mutation and lineage data +} From ac08a09cea7c18b4cc9f07d08fee483a2f394a11 Mon Sep 17 00:00:00 2001 From: Russ Hyde Date: Wed, 2 Nov 2022 16:04:45 +0000 Subject: [PATCH 3/9] refac: extract function for sorting mutation strings from .plot_tree --- R/plot_tree.R | 33 +++++++++++++++++++++++++++++++++ R/treeview.R | 37 ++++++++++--------------------------- inst/WORDLIST | 3 +++ man/sort_mutations.Rd | 20 ++++++++++++++++++++ 4 files changed, 66 insertions(+), 27 deletions(-) create mode 100644 man/sort_mutations.Rd diff --git a/R/plot_tree.R b/R/plot_tree.R index 36dc90e..6f7aa7c 100644 --- a/R/plot_tree.R +++ b/R/plot_tree.R @@ -71,3 +71,36 @@ create_noninteractive_ggtree <- function(ggtree_data, gtr1.1 } + +#' Sorts a vector of mutations +#' +#' @param muts String. Vector of mutations. Each string must be a separate mutation +#' (e.g., "S:A243del"). The mutations have a prefix ("S:", "N:") and a positional description of +#' the protein-level mutation ("T205I" for Thr to Ile mutation at position 205). +#' +#' @return String. A vector of the same length as \code{muts}. The mutations are sorted by prefix +#' and then by the location of the mutation. + +sort_mutations <- function(muts) { + if (length(muts) == 0) { + return("") + } + pre <- sapply(strsplit(muts, split = ":"), "[", 1) + upres <- sort(unique(pre)) + sorted_mutations <- do.call(c, lapply(upres, function(.pre) { + .muts <- muts[pre == .pre] + .muts1 <- sapply(strsplit(.muts, + split = ":" + ), "[", 2) + sites <- regmatches( + .muts1, + regexpr(.muts1, + pattern = "[0-9]+" + ) + ) + o <- order(as.numeric(sites)) + .muts[o] + })) + + sorted_mutations +} diff --git a/R/treeview.R b/R/treeview.R index 9faab97..7463c1e 100644 --- a/R/treeview.R +++ b/R/treeview.R @@ -273,44 +273,26 @@ treeview <- function(e0, ) }) - ## table with geo composition ttregtabs <- gtr1.1$data$region_summary # ## cocirc ttcocirc <- gtr1.1$data$cocirc_summary # - ## defining muts - .sort.muts <- function(muts) { - if (length(muts) == 0) { - return("") - } - pre <- sapply(strsplit(muts, split = ":"), "[", 1) - upres <- sort(unique(pre)) - do.call(c, lapply(upres, function(.pre) { - .muts <- muts[pre == .pre] - .muts1 <- sapply(strsplit(.muts, - split = ":" - ), "[", 2) - sites <- regmatches( - .muts1, - regexpr(.muts1, - pattern = "[0-9]+" - ) - ) - o <- order(as.numeric(sites)) - .muts[o] - })) - } + ## defining muts ttdefmuts <- sapply(match(gtr1.1$data$cluster_id, sc0$cluster_id), function(isc0) { if (is.na(isc0)) { return("") } paste( - sep = "\n", "Cluster branch mutations:", + sep = "\n", + "Cluster branch mutations:", gsub( x = tryCatch( stringr::str_wrap( - paste(collapse = " ", .sort.muts(cmuts[[as.character(sc0$node_number[isc0])]]$defining)), + paste( + collapse = " ", + sort_mutations(cmuts[[as.character(sc0$node_number[isc0])]]$defining) + ), width = 60 ), error = function(e) browser() @@ -327,12 +309,13 @@ treeview <- function(e0, return("") } paste( - sep = "\n", "All mutations:", + sep = "\n", + "All mutations:", gsub( x = stringr::str_wrap( paste( collapse = " ", - .sort.muts(cmuts[[as.character(sc0$node_number[isc0])]]$all) + sort_mutations(cmuts[[as.character(sc0$node_number[isc0])]]$all) ), width = 60 ), diff --git a/inst/WORDLIST b/inst/WORDLIST index 26f77cd..f055687 100644 --- a/inst/WORDLIST +++ b/inst/WORDLIST @@ -7,6 +7,7 @@ colour Config CoV cpu +del dev dichotomised doMPI @@ -19,6 +20,7 @@ ggplot ggtree htmlwidgets ide +Ile knitr libopenmpi lubridate @@ -50,6 +52,7 @@ summarised SystemRequirements testthat tfpscanner +Thr tis tooltips treedata diff --git a/man/sort_mutations.Rd b/man/sort_mutations.Rd new file mode 100644 index 0000000..b4edba1 --- /dev/null +++ b/man/sort_mutations.Rd @@ -0,0 +1,20 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/plot_tree.R +\name{sort_mutations} +\alias{sort_mutations} +\title{Sorts a vector of mutations} +\usage{ +sort_mutations(muts) +} +\arguments{ +\item{muts}{String. Vector of mutations. Each string must be a separate mutation +(e.g., "S:A243del"). The mutations have a prefix ("S:", "N:") and a positional description of +the protein-level mutation ("T205I" for Thr to Ile mutation at position 205).} +} +\value{ +String. A vector of the same length as \code{muts}. The mutations are sorted by prefix + and then by the location of the mutation. +} +\description{ +Sorts a vector of mutations +} From ca9f11d4d71cc71f794872b0fc71133f5f087469 Mon Sep 17 00:00:00 2001 From: Russ Hyde Date: Thu, 3 Nov 2022 16:17:26 +0000 Subject: [PATCH 4/9] refac: function to add heatmap to ggtree object --- R/plot_tree.R | 24 ++++++++++++++++++++++++ R/treeview.R | 15 ++++++--------- man/append_heatmap.Rd | 21 +++++++++++++++++++++ 3 files changed, 51 insertions(+), 9 deletions(-) create mode 100644 man/append_heatmap.Rd diff --git a/R/plot_tree.R b/R/plot_tree.R index 6f7aa7c..524ba17 100644 --- a/R/plot_tree.R +++ b/R/plot_tree.R @@ -72,6 +72,30 @@ create_noninteractive_ggtree <- function(ggtree_data, gtr1.1 } +#' Adds a heatmap to the right of a ggtree object +#' +#' @param ggobj A ggtree object. +#' @param genotype The heatmap data. +#' @param heatmap_width,heatmap_lab_offset Parameters for positioning of the heatmap. +#' +#' @return A \code{ggtree} / \code{gg} / \code{ggplot} object with an appended heatmap. + +append_heatmap <- function(ggobj, + genotype, + heatmap_width = 1, + heatmap_lab_offset = 0) { + ggtree::gheatmap( + p = ggobj, + data = genotype, + width = heatmap_width, + offset = 0.0005, + colnames_angle = -90, + colnames_position = "top", + colnames_offset_y = heatmap_lab_offset, + legend_title = "Genotype" + ) +} + #' Sorts a vector of mutations #' #' @param muts String. Vector of mutations. Each string must be a separate mutation diff --git a/R/treeview.R b/R/treeview.R index 7463c1e..ffd7879 100644 --- a/R/treeview.R +++ b/R/treeview.R @@ -356,15 +356,12 @@ treeview <- function(e0, ) }) gtr1.1$data$colour_var <- gtr1.1$data[[vn]] - gtr1.2 <- ggtree::gheatmap( - gtr1.1, - genotype, - width = heatmap_width, - offset = 0.0005, - colnames_angle = -90, - colnames_position = "top", - colnames_offset_y = heatmap_lab_offset, - legend_title = "Genotype" + + gtr1.2 <- append_heatmap( + ggobj = gtr1.1, + genotype = genotype, + heatmap_width = heatmap_width, + heatmap_lab_offset = heatmap_lab_offset ) gtr1.3 <- gtr1.2 + diff --git a/man/append_heatmap.Rd b/man/append_heatmap.Rd new file mode 100644 index 0000000..de676dd --- /dev/null +++ b/man/append_heatmap.Rd @@ -0,0 +1,21 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/plot_tree.R +\name{append_heatmap} +\alias{append_heatmap} +\title{Adds a heatmap to the right of a ggtree object} +\usage{ +append_heatmap(ggobj, genotype, heatmap_width = 1, heatmap_lab_offset = 0) +} +\arguments{ +\item{ggobj}{A ggtree object.} + +\item{genotype}{The heatmap data.} + +\item{heatmap_width, heatmap_lab_offset}{Parameters for positioning of the heatmap.} +} +\value{ +A \code{ggtree} / \code{gg} / \code{ggplot} object with an appended heatmap. +} +\description{ +Adds a heatmap to the right of a ggtree object +} From 5b5dae698c03db2279d5834a09643c9eeb2e58d8 Mon Sep 17 00:00:00 2001 From: Russ Hyde Date: Thu, 3 Nov 2022 16:46:25 +0000 Subject: [PATCH 5/9] refac: function for creating interactive ggtree object --- R/plot_tree.R | 50 ++++++++++++++++++++++++++++++++ R/treeview.R | 39 +++++-------------------- man/create_interactive_ggtree.Rd | 37 +++++++++++++++++++++++ 3 files changed, 95 insertions(+), 31 deletions(-) create mode 100644 man/create_interactive_ggtree.Rd diff --git a/R/plot_tree.R b/R/plot_tree.R index 524ba17..bda03d2 100644 --- a/R/plot_tree.R +++ b/R/plot_tree.R @@ -96,6 +96,56 @@ append_heatmap <- function(ggobj, ) } +#' Converts a \code{ggtree} object into a \code{ggiraph} object with interactive potential +#' +#' @param ggobj A \code{ggtree} object. +#' @param branch_col Scalar string. Name of the column in \code{ggobj$data} that we are +#' creating the plot for. +#' @param cluster_size_range Numeric (length-2). min and max values for cluster sizes on the +#' chart. +#' @inheritParams create_noninteractive_ggtree +#' +#' @return A \code{ggtree} object with interactive data for presentation by +#' \code{ggiraph::girafe}. + +create_interactive_ggtree <- function(ggobj, + branch_col, + cluster_size_range, + shapes, + colours, + colour_limits) { + ggobj + + ggiraph::geom_point_interactive( + ggplot2::aes( + x = .data$x, + y = .data$y, + color = .data$colour_var, + tooltip = .data$mouseover, + data_id = .data$node, + size = .data$cluster_size + 1, + shape = as.factor(.data$internal) + ) + ) + + ggplot2::scale_shape_manual( + name = NULL, + labels = NULL, + values = shapes + ) + + ggplot2::scale_size( + name = "Cluster size", + range = cluster_size_range + ) + + ggplot2::scale_color_gradientn( + name = stringr::str_to_title( + gsub(branch_col, pattern = "_", replacement = " ") + ), + colours = colours, + limits = colour_limits, + oob = scales::squish + ) + + ggplot2::theme(legend.position = "top") +} + #' Sorts a vector of mutations #' #' @param muts String. Vector of mutations. Each string must be a separate mutation diff --git a/R/treeview.R b/R/treeview.R index ffd7879..4355ea9 100644 --- a/R/treeview.R +++ b/R/treeview.R @@ -364,37 +364,14 @@ treeview <- function(e0, heatmap_lab_offset = heatmap_lab_offset ) - gtr1.3 <- gtr1.2 + - ggiraph::geom_point_interactive( - ggplot2::aes( - x = .data$x, - y = .data$y, - color = .data$colour_var, - tooltip = .data$mouseover, - data_id = .data$node, - size = .data$cluster_size + 1, - shape = as.factor(.data$internal) - ) - ) + - ggplot2::scale_shape_manual( - name = NULL, - labels = NULL, - values = shapes - ) + - ggplot2::scale_size( - name = "Cluster size", - range = c(2, 16) - ) + - ggplot2::scale_color_gradientn( - name = stringr::str_to_title(gsub(vn, - pattern = "_", - replacement = " " - )), - colours = cols, - limits = colour_limits, - oob = scales::squish - ) + - ggplot2::theme(legend.position = "top") + gtr1.3 <- create_interactive_ggtree( + gtr1.2, + branch_col = vn, + cluster_size_range = c(2, 16), + shapes = shapes, + colours = cols, + colour_limits = colour_limits + ) pgtr1.3 <- create_widget( ggobj = gtr1.3, diff --git a/man/create_interactive_ggtree.Rd b/man/create_interactive_ggtree.Rd new file mode 100644 index 0000000..cb004b7 --- /dev/null +++ b/man/create_interactive_ggtree.Rd @@ -0,0 +1,37 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/plot_tree.R +\name{create_interactive_ggtree} +\alias{create_interactive_ggtree} +\title{Converts a \code{ggtree} object into a \code{ggiraph} object with interactive potential} +\usage{ +create_interactive_ggtree( + ggobj, + branch_col, + cluster_size_range, + shapes, + colours, + colour_limits +) +} +\arguments{ +\item{ggobj}{A \code{ggtree} object.} + +\item{branch_col}{Scalar string. Name of the column in \code{ggobj$data} that we are +creating the plot for.} + +\item{cluster_size_range}{Numeric (length-2). min and max values for cluster sizes on the +chart.} + +\item{shapes}{Shapes for the branches and leaves in the tree.} + +\item{colours}{Vector of colours.} + +\item{colour_limits}{Min and max values for the colours.} +} +\value{ +A \code{ggtree} object with interactive data for presentation by + \code{ggiraph::girafe}. +} +\description{ +Converts a \code{ggtree} object into a \code{ggiraph} object with interactive potential +} From 024e19e7a9a7a5e6bb30d6e20e4adbe48d811eed Mon Sep 17 00:00:00 2001 From: Russ Hyde Date: Fri, 4 Nov 2022 11:45:20 +0000 Subject: [PATCH 6/9] refac: move def of genotype after modification to ggtree object --- R/treeview.R | 33 ++++++++++++++++++--------------- 1 file changed, 18 insertions(+), 15 deletions(-) diff --git a/R/treeview.R b/R/treeview.R index 4355ea9..4f5b25f 100644 --- a/R/treeview.R +++ b/R/treeview.R @@ -335,18 +335,10 @@ treeview <- function(e0, } } - genotype <- as.data.frame( - gtr1.1$data[ - gtr1.1$data$node <= ape::Ntip(tr2), - c("label", mut_regex) - ] - ) - rownames(genotype) <- genotype$label - genotype <- genotype[, -1, drop = FALSE] - # make html widget gtr1.1$data$mouseover <- sapply(seq_along(ttdfs), function(i) { - paste0("Statistics:\n", ttdfs[i], + paste0( + "Statistics:\n", ttdfs[i], "\n\nGeography:\n", ttregtabs[i], "\n\nCo-circulating with:\n", ttcocirc[i], "\n\n", ttdefmuts[i], @@ -357,6 +349,15 @@ treeview <- function(e0, }) gtr1.1$data$colour_var <- gtr1.1$data[[vn]] + genotype <- as.data.frame( + gtr1.1$data[ + gtr1.1$data$node <= ape::Ntip(tr2), + c("label", mut_regex) + ] + ) + rownames(genotype) <- genotype$label + genotype <- genotype[, -1, drop = FALSE] + gtr1.2 <- append_heatmap( ggobj = gtr1.1, genotype = genotype, @@ -394,11 +395,13 @@ treeview <- function(e0, } message("Generating figures") - - pl <- suppressWarnings(.plot_tree("logistic_growth_rate", - mut_regex = mutations, - colour_limits = c(-.5, .5) - )) + pl <- suppressWarnings( + .plot_tree( + "logistic_growth_rate", + mut_regex = mutations, + colour_limits = c(-.5, .5) + ) + ) pldf <- pl$data for (vn in setdiff(branch_cols, c("logistic_growth_rate"))) { suppressWarnings(.plot_tree(vn, mut_regex = mutations)) From 1e546b2fa11cc93313a0970a33c5db3cb6ebdd38 Mon Sep 17 00:00:00 2001 From: Russ Hyde Date: Fri, 4 Nov 2022 12:33:44 +0000 Subject: [PATCH 7/9] refac: extract function for adding interactivity data to a ggtree object --- R/plot_tree.R | 125 +++++++++++++++++++++++++++++++ R/treeview.R | 109 ++------------------------- man/append_interactivity_data.Rd | 26 +++++++ 3 files changed, 158 insertions(+), 102 deletions(-) create mode 100644 man/append_interactivity_data.Rd diff --git a/R/plot_tree.R b/R/plot_tree.R index bda03d2..4606a98 100644 --- a/R/plot_tree.R +++ b/R/plot_tree.R @@ -72,6 +72,131 @@ create_noninteractive_ggtree <- function(ggtree_data, gtr1.1 } +#' Adds data to a ggtree object to allow mouse-over tooltips etc when presented interactively +#' +#' @param ggobj A \code{ggtree} object. +#' @param branch_col Scalar string. The name of a column within \code{ggobj$data} defining the +#' statistic under study here (`logistic_growth_rate`, `clock_outlier`). +#' @param sc0,cmuts Data-frames. +#' @param mut_regex Regular expression. Defines the mutations under study here. +#' +#' @return A ggtree object. The \code{$data} entry has additional entries (\code{mouseover}, +#' \code{colour_var}, \code{defmuts}, \code{allmuts}) that are used when presented interactively +#' by \code{ggiraph}. + +append_interactivity_data <- function(ggobj, + branch_col, + sc0, + cmuts, + mut_regex) { + # make mouseover info + ## standard meta data + ttdfs <- apply(ggobj$data, 1, FUN = function(x) { + z <- as.list(x) + lgr <- as.numeric(z$logistic_growth_rate) + # TODO: replace with() with explicit z$cluster_id etc + y <- with( + z, + data.frame( + `Cluster ID` = glue::glue("#{cluster_id}"), + `Cluster size` = cluster_size, + `Date range` = date_range, + `Example sequence` = label, + `Logistic growth` = paste0( + ifelse(lgr > 0, "+", ""), + round(lgr * 100), "%" + ), + `Mol clock outlier` = clock_outlier, + `Lineages` = lineages + ) + ) + y <- t(y) + colnames(y) <- "" + tryCatch( + paste(knitr::kable(y, "simple"), collapse = "\n"), + error = function(e) paste(knitr::kable(y, "markdown"), collapse = "\n") + ) + }) + + ## table with geo composition + ttregtabs <- ggobj$data$region_summary # + ## cocirc + ttcocirc <- ggobj$data$cocirc_summary # + + ## defining muts + ttdefmuts <- sapply(match(ggobj$data$cluster_id, sc0$cluster_id), function(isc0) { + if (is.na(isc0)) { + return("") + } + paste( + sep = "\n", + "Cluster branch mutations:", + gsub( + x = tryCatch( + stringr::str_wrap( + paste( + collapse = " ", + sort_mutations(cmuts[[as.character(sc0$node_number[isc0])]]$defining) + ), + width = 60 + ), + error = function(e) browser() + ), + pattern = " ", + replacement = ", " + ), + "\n" + ) + }) # end of sapply + + ttallmuts <- sapply(match(ggobj$data$cluster_id, sc0$cluster_id), function(isc0) { + if (is.na(isc0)) { + return("") + } + paste( + sep = "\n", + "All mutations:", + gsub( + x = stringr::str_wrap( + paste( + collapse = " ", + sort_mutations(cmuts[[as.character(sc0$node_number[isc0])]]$all) + ), + width = 60 + ), + pattern = " ", + replacement = ", " + ), + "\n" + ) + }) # end of sapply + + ggobj$data$defmuts <- ttdefmuts + ggobj$data$allmuts <- ttallmuts + if (!is.null(mut_regex)) { + for (mre in mut_regex) { + i <- which(grepl(ggobj$data$allmuts, pattern = mre)) + ggobj$data[[mre]] <- grepl(ggobj$data$allmuts, pattern = mre) + } + } + + # make html widget + ggobj$data$mouseover <- sapply(seq_along(ttdfs), function(i) { + paste0( + "Statistics:\n", ttdfs[i], + "\n\nGeography:\n", ttregtabs[i], + "\n\nCo-circulating with:\n", ttcocirc[i], + "\n\n", ttdefmuts[i], + "\n", ttallmuts[i], + "\n", + collapse = "\n" + ) + }) + ggobj$data$colour_var <- ggobj$data[[branch_col]] + + ggobj +} + #' Adds a heatmap to the right of a ggtree object #' #' @param ggobj A ggtree object. diff --git a/R/treeview.R b/R/treeview.R index 4f5b25f..d9d9cb5 100644 --- a/R/treeview.R +++ b/R/treeview.R @@ -246,108 +246,13 @@ treeview <- function(e0, limitsize = FALSE ) - # make mouseover info - ## standard meta data - ttdfs <- apply(gtr1.1$data, 1, FUN = function(x) { - z <- as.list(x) - lgr <- as.numeric(z$logistic_growth_rate) - y <- with( - z, - data.frame( - `Cluster ID` = glue::glue("#{cluster_id}"), - `Cluster size` = cluster_size, - `Date range` = date_range, - `Example sequence` = label, - `Logistic growth` = paste0( - ifelse(lgr > 0, "+", ""), - round(lgr * 100), "%" - ), - `Mol clock outlier` = clock_outlier, - `Lineages` = lineages - ) - ) - y <- t(y) - colnames(y) <- "" - tryCatch(paste(knitr::kable(y, "simple"), collapse = "\n"), - error = function(e) paste(knitr::kable(y, "markdown"), collapse = "\n") - ) - }) - - ## table with geo composition - ttregtabs <- gtr1.1$data$region_summary # - ## cocirc - ttcocirc <- gtr1.1$data$cocirc_summary # - - ## defining muts - ttdefmuts <- sapply(match(gtr1.1$data$cluster_id, sc0$cluster_id), function(isc0) { - if (is.na(isc0)) { - return("") - } - paste( - sep = "\n", - "Cluster branch mutations:", - gsub( - x = tryCatch( - stringr::str_wrap( - paste( - collapse = " ", - sort_mutations(cmuts[[as.character(sc0$node_number[isc0])]]$defining) - ), - width = 60 - ), - error = function(e) browser() - ), - pattern = " ", - replacement = ", " - ), - "\n" - ) - }) # end of sapply - - ttallmuts <- sapply(match(gtr1.1$data$cluster_id, sc0$cluster_id), function(isc0) { - if (is.na(isc0)) { - return("") - } - paste( - sep = "\n", - "All mutations:", - gsub( - x = stringr::str_wrap( - paste( - collapse = " ", - sort_mutations(cmuts[[as.character(sc0$node_number[isc0])]]$all) - ), - width = 60 - ), - pattern = " ", - replacement = ", " - ), - "\n" - ) - }) # end of sapply - - gtr1.1$data$defmuts <- ttdefmuts - gtr1.1$data$allmuts <- ttallmuts - if (!is.null(mut_regex)) { - for (mre in mut_regex) { - i <- which(grepl(gtr1.1$data$allmuts, pattern = mre)) - gtr1.1$data[[mre]] <- grepl(gtr1.1$data$allmuts, pattern = mre) - } - } - - # make html widget - gtr1.1$data$mouseover <- sapply(seq_along(ttdfs), function(i) { - paste0( - "Statistics:\n", ttdfs[i], - "\n\nGeography:\n", ttregtabs[i], - "\n\nCo-circulating with:\n", ttcocirc[i], - "\n\n", ttdefmuts[i], - "\n", ttallmuts[i], - "\n", - collapse = "\n" - ) - }) - gtr1.1$data$colour_var <- gtr1.1$data[[vn]] + gtr1.1 <- append_interactivity_data( + gtr1.1, + branch_col = vn, + sc0 = sc0, + cmuts = cmuts, + mut_regex = mut_regex + ) genotype <- as.data.frame( gtr1.1$data[ diff --git a/man/append_interactivity_data.Rd b/man/append_interactivity_data.Rd new file mode 100644 index 0000000..291ce48 --- /dev/null +++ b/man/append_interactivity_data.Rd @@ -0,0 +1,26 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/plot_tree.R +\name{append_interactivity_data} +\alias{append_interactivity_data} +\title{Adds data to a ggtree object to allow mouse-over tooltips etc when presented interactively} +\usage{ +append_interactivity_data(ggobj, branch_col, sc0, cmuts, mut_regex) +} +\arguments{ +\item{ggobj}{A \code{ggtree} object.} + +\item{branch_col}{Scalar string. The name of a column within \code{ggobj$data} defining the +statistic under study here (`logistic_growth_rate`, `clock_outlier`).} + +\item{sc0, cmuts}{Data-frames.} + +\item{mut_regex}{Regular expression. Defines the mutations under study here.} +} +\value{ +A ggtree object. The \code{$data} entry has additional entries (\code{mouseover}, + \code{colour_var}, \code{defmuts}, \code{allmuts}) that are used when presented interactively + by \code{ggiraph}. +} +\description{ +Adds data to a ggtree object to allow mouse-over tooltips etc when presented interactively +} From a94e1da9fa51c01ceeed772660cf181b1377d7ec Mon Sep 17 00:00:00 2001 From: Russ Hyde Date: Fri, 4 Nov 2022 12:45:27 +0000 Subject: [PATCH 8/9] refac: function for pulling heatmap genotype data from ggtree object --- R/plot_tree.R | 26 ++++++++++++++++++++++++++ R/treeview.R | 11 ++++------- man/extract_genotype_data.Rd | 24 ++++++++++++++++++++++++ 3 files changed, 54 insertions(+), 7 deletions(-) create mode 100644 man/extract_genotype_data.Rd diff --git a/R/plot_tree.R b/R/plot_tree.R index 4606a98..bf55cc1 100644 --- a/R/plot_tree.R +++ b/R/plot_tree.R @@ -197,6 +197,32 @@ append_interactivity_data <- function(ggobj, ggobj } +#' Extract the data about viral genotypes from a \code{ggtree} object +#' +#' @param ggobj A \code{ggtree} object, as generated by \code{append_interactivity_data}. The +#' \code{data} entry for this object should contain the columns "node", "label" and a column for +#' each of the \code{mut_regex} values. +#' @param n_leaves Scalar integer. The number of leaves in the tree. +#' @param mut_regex String. Regular expression defining the mutations under study here. This +#' should be a subset of the column-names in \code{ggobj$data}. +#' +#' @return Data-frame. + +extract_genotype_data <- function(ggobj, + n_leaves, + mut_regex) { + genotype <- as.data.frame( + ggobj$data[ + ggobj$data$node <= n_leaves, + c("label", mut_regex) + ] + ) + rownames(genotype) <- genotype$label + genotype <- genotype[, -1, drop = FALSE] + + genotype +} + #' Adds a heatmap to the right of a ggtree object #' #' @param ggobj A ggtree object. diff --git a/R/treeview.R b/R/treeview.R index d9d9cb5..c8fab2a 100644 --- a/R/treeview.R +++ b/R/treeview.R @@ -254,14 +254,11 @@ treeview <- function(e0, mut_regex = mut_regex ) - genotype <- as.data.frame( - gtr1.1$data[ - gtr1.1$data$node <= ape::Ntip(tr2), - c("label", mut_regex) - ] + genotype <- extract_genotype_data( + ggobj = gtr1.1, + n_leaves = ape::Ntip(tr2), + mut_regex = mut_regex ) - rownames(genotype) <- genotype$label - genotype <- genotype[, -1, drop = FALSE] gtr1.2 <- append_heatmap( ggobj = gtr1.1, diff --git a/man/extract_genotype_data.Rd b/man/extract_genotype_data.Rd new file mode 100644 index 0000000..99442fc --- /dev/null +++ b/man/extract_genotype_data.Rd @@ -0,0 +1,24 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/plot_tree.R +\name{extract_genotype_data} +\alias{extract_genotype_data} +\title{Extract the data about viral genotypes from a \code{ggtree} object} +\usage{ +extract_genotype_data(ggobj, n_leaves, mut_regex) +} +\arguments{ +\item{ggobj}{A \code{ggtree} object, as generated by \code{append_interactivity_data}. The +\code{data} entry for this object should contain the columns "node", "label" and a column for +each of the \code{mut_regex} values.} + +\item{n_leaves}{Scalar integer. The number of leaves in the tree.} + +\item{mut_regex}{String. Regular expression defining the mutations under study here. This +should be a subset of the column-names in \code{ggobj$data}.} +} +\value{ +Data-frame. +} +\description{ +Extract the data about viral genotypes from a \code{ggtree} object +} From 75f0a7548a2f918f395507f7d5f2ebabcca9cc4c Mon Sep 17 00:00:00 2001 From: Russ Hyde Date: Wed, 9 Nov 2022 09:53:29 +0000 Subject: [PATCH 9/9] chore: bump version --- DESCRIPTION | 4 ++-- NEWS.md | 5 ++++- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 1d8273b..b3c5d21 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: tfpscanner Title: Transmission fitness polymorphism scanner -Version: 0.1.5 -Date: 2022-11-07 +Version: 0.1.6 +Date: 2022-11-09 Author: Erik Volz, Olivia Boyd Maintainer: Erik Volz Description: A pipeline for scanning a SARS-CoV-2 phylogeny for clades with outlying growth diff --git a/NEWS.md b/NEWS.md index 639415a..3672991 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,8 +1,11 @@ +# tfpscanner 0.1.6 _2022-11-09_ + +- Split up the `.plot_tree` function into smaller components + # tfpscanner 0.1.5 _2022-11-07_ - Separate the function for plotting `sina` cluster data from the `treeview` function - # tfpscanner 0.1.4 _2022-11-07_ - Add `testthat` and `pre-commit` infrastructure