Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added locate - like focus, but add loci as track instead of zoom; added position_* examples #50

Merged
merged 3 commits into from
Feb 15, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -136,6 +136,7 @@ export(layout)
export(layout_genomes)
export(layout_seqs)
export(links)
export(locate)
export(pick)
export(pick_by_tree)
export(pick_seqs)
Expand Down
101 changes: 77 additions & 24 deletions R/focus.R
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
#' Focus on features and regions of interest
#' Show features and regions of interest
#'
#' Only show loci containing features of interest. Loci can either be provided
#' Show loci containing features of interest. Loci can either be provided
#' as predefined regions directly (`loci=`), or are constructed automatically
#' based on pre-selected features (via `...`). Features within `max_dist` are
#' greedily combined into the same locus. The, thus, identified loci are
#' extracted from their parent sequences and promoted to new sequences,
#' replacing the original sequence set. The new sequences will have their
#' `locus_id` as their new `seq_id`.
#' greedily combined into the same locus. `locate()` adds these loci as new
#' track so that they can be easily visualized. `focus()` extracts those loci
#' from their parent sequences making them the new sequence set. These sequences
#' will have their `locus_id` as their new `seq_id`.
#' @param x A gggenomes object
#' @param ... Logical predicates defined in terms of the variables in the track
#' given by `.track_id`. Multiple conditions are combined with ‘&’. Only rows
Expand Down Expand Up @@ -34,6 +34,14 @@
#' original binning, but can be set to the "seq" to bin all loci originating
#' from the same parent sequence, or to "locus" to separate all loci into
#' individual bins.
#' @param .locus_score An expression evaluated in the context of all features
#' that are combined into a new locus. Results are stored in the column
#' `locus_score`. Defaults to the `n()`, i.e. the number of features per
#' locus. Set, for example, to `sum(bitscore)` to sum over all blast hit
#' bitscore of per locus. Usually used in conjunction with `.locus_filter`.
#' @param .locus_filter An predicate expression used to post-filter identified
#' loci. Set `.locus_filter=locus_score >= 3` to only return loci comprising
#' at least 3 target features.
#' @param .loci A data.frame specifying loci directly. Required columns are
#' `seq_id,start,end`. Supersedes `...`.
#' @export
Expand All @@ -44,8 +52,7 @@
#' s0 <- read_seqs(ex("gorg/gorg.fna"))
#' s1 <- s0 %>%
#' # strip trailing number from contigs to get bins
#' mutate(bin_id = str_remove(seq_id, "_\\d+$")) %>%
#' slice_head(n=200) # we only need a few contigs for this
#' mutate(bin_id = str_remove(seq_id, "_\\d+$"))
#' # gene annotations from prokka
#' g0 <- read_feats(ex("gorg/gorg.gff"))
#'
Expand All @@ -67,6 +74,15 @@
#' scale_color_brewer(palette="Dark2") +
#' geom_point(aes(x=x,y=y, color=system), data=feats())
#'
#' # hilight the regions containing hits
#' gggenomes(g0, s1, f1, wrap=2e5) %>%
#' locate(.track_id = feats) %>%
#' identity() +
#' geom_seq() + geom_bin_label() +
#' scale_color_brewer(palette="Dark2") +
#' geom_feat(data=feats(loci), color="plum3") +
#' geom_point(aes(x=x,y=y, color=system), data=feats())
#'
#' # zoom in on loci
#' gggenomes(g0, s1, f1, wrap=5e4) %>%
#' focus(.track_id = feats) +
Expand All @@ -75,7 +91,7 @@
#' geom_feat(aes(color=system)) +
#' geom_feat_tag(aes(label=gene)) +
#' scale_color_brewer(palette="Dark2")
#'
#' @describeIn focus Identify regions of interest and zoom in on them
focus <- function(x, ..., .track_id=2, .max_dist = 10e3, .expand=5e3,
.overhang=c("drop", "trim", "keep"),
.locus_id=str_glue("{seq_id}_lc{row_number()}"), .locus_id_group = seq_id,
Expand All @@ -87,25 +103,17 @@ focus <- function(x, ..., .track_id=2, .max_dist = 10e3, .expand=5e3,

# construct loci from predicate hits
if(is.null(.loci)){
targets <- pull_track(x, {{.track_id}}, ...)
if(nrow(targets) <1)
abort("Found no targets to focus on")
loci <- targets %>%
compute_loci(max_dist=.max_dist, locus_score={{.locus_score}}, locus_filter={{.locus_filter}},
locus_id={{.locus_id}}, locus_id_group={{.locus_id_group}}) %>%
arrange(locus_id) %>%
mutate(
start = start - .expand[1],
end=end + .expand[2],
locus_length = width(start, end)
)
loci <- locate_impl(x, ..., .track_id={{.track_id}}, .max_dist=.max_dist,
.expand=.expand, .locus_id={{.locus_id}},
.locus_id_group={{.locus_id_group}}, .locus_bin={{.locus_bin}},
.locus_score={{.locus_score}}, .locus_filter={{.locus_filter}})
}else{
# coerce IDs to chars, so we don't get errors in join by mismatched types
loci <- mutate(.loci, seq_id = as.character(seq_id))
if(!has_name("locus_id")){
print("hi")
loci <- group_by({{ locus_id_group }}) %>%
mutate(loci, locus_id = str_glue(locus_id)) %>%
loci <- group_by({{ .locus_id_group }}) %>%
mutate(loci, locus_id = str_glue(.locus_id)) %>%
ungroup
}
}
Expand Down Expand Up @@ -138,7 +146,7 @@ focus <- function(x, ..., .track_id=2, .max_dist = 10e3, .expand=5e3,
x$data$orig_links <- map(x$data$orig_links, focus_links, s, bin_id)

# rename seqs/bins to loci
s <- mutate(s, bin_id = .data[[bin_id]], seq_id = locus_id)
s <- mutate(s, bin_id = .data[[bin_id]], orig_seq_id = seq_id, seq_id = locus_id)

if(FALSE){#any(duplicated(s$seq_id))){
# NOTE: currently, this would create a clone of the sequence - duplicating
Expand All @@ -155,6 +163,51 @@ focus <- function(x, ..., .track_id=2, .max_dist = 10e3, .expand=5e3,
layout(x, args_feats = list(marginal = marginal))
}

#' @export
#' @param .locus_track The name of the new track containing the identified loci.
#' @describeIn focus Identify regions of interest and add them as new feature track
locate <- function(x, ..., .track_id=2, .max_dist = 10e3, .expand=5e3,
.locus_id=str_glue("{seq_id}_lc{row_number()}"), .locus_id_group = seq_id,
.locus_bin = c("bin", "seq", "locus"),
.locus_score=n(), .locus_filter=TRUE, .locus_track="loci"){

loci <- locate_impl(x, ..., .track_id={{.track_id}}, .max_dist=.max_dist,
.expand=.expand, .locus_id={{.locus_id}},
.locus_id_group={{.locus_id_group}}, .locus_bin={{.locus_bin}},
.locus_score={{.locus_score}}, .locus_filter={{.locus_filter}})

# odd construct to name an argument in a call based on a variable
# essentially doing: add_feats(x, !!.locus_track=loci) - which doesn't work
inform(str_glue(
"Adding '{.locus_track}' track. Plot with `geom_feat(data=feats({.locus_track}))`"))
args <- set_names(list(loci), .locus_track);
exec(add_feats, x, !!!args)
}

locate_impl <- function(x, ..., .track_id=2, .max_dist = 10e3, .expand=5e3,
.locus_id=str_glue("{seq_id}_lc{row_number()}"), .locus_id_group = seq_id,
.locus_bin = c("bin", "seq", "locus"),
.locus_score=n(), .locus_filter=TRUE, .loci=NULL){
if(length(.expand==1)) .expand <- c(.expand,.expand)
bin_id <- paste0(match.arg(.locus_bin), "_id")

targets <- pull_track(x, {{.track_id}}, ...)
if(nrow(targets) <1)
abort("Found no targets to build loci from")

loci <- targets %>%
compute_loci(max_dist=.max_dist, locus_score={{.locus_score}}, locus_filter={{.locus_filter}},
locus_id={{.locus_id}}, locus_id_group={{.locus_id_group}}) %>%
arrange(locus_id) %>%
mutate(
start = start - .expand[1],
end=end + .expand[2],
locus_length = width(start, end)
)
loci
}


focus_feats <- function(track, seqs, bin_id){
# !! is loci - rest is feature
track <- add_feat_focus_scaffold(track, seqs)
Expand Down
26 changes: 24 additions & 2 deletions R/position_strandpile.R
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,28 @@
#' lowest stack level on the sequence, 1 to put forward/reverse sequence one
#' half offset above/below the sequence line.
#' @export
#' @examples
#' library(patchwork)
#' p <- gggenomes(emale_genes) %>%
#' pick(3:4) + geom_seq()
#'
#' f0 <- tibble(
#' seq_id = pull_seqs(p)$seq_id[1],
#' start = 1:20*1000,
#' end = start + 2500,
#' strand = rep(c("+", "-"), length(start)/2)
#' )
#'
#' sixframe <- function(x, strand) as.character((x%%3 + 1) * strand_int(strand))
#'
#' p1 <- p + geom_gene()
#' p2 <- p + geom_gene(aes(fill=strand), position="strand")
#' p3 <- p + geom_gene(aes(fill=strand), position=position_strand(flip=TRUE, base=0.2))
#' p4 <- p + geom_gene(aes(fill=sixframe(x,strand)), position="sixframe")
#' p5 <- p %>% add_feats(f0) + geom_gene() + geom_feat(aes(color=strand))
#' p6 <- p %>% add_feats(f0) + geom_gene() + geom_feat(aes(color=strand), position="strandpile")
#' p1 + p2 + p3 + p4 + p5 + p6 + plot_layout(ncol=3, guides="collect") & ylim(2.5,0.5)
#'
position_strand <- function(offset = 0.1, flip = FALSE, grouped = NULL,
base = offset/2){
ggproto(NULL, PositionStrand, offset = offset, flip = flip,
Expand All @@ -27,7 +49,7 @@ position_strand <- function(offset = 0.1, flip = FALSE, grouped = NULL,
#' @rdname position_strand
#' @export
position_pile <- function(offset = 0.1, gap=1, flip = FALSE,
grouped = NULL, base = offset){
grouped = NULL, base = 0){
ggproto(NULL, PositionPile, offset = offset, gap = gap,
flip = flip, grouped = grouped, base = base)
}
Expand Down Expand Up @@ -124,7 +146,7 @@ PositionStrandpile <- ggproto("PositionStrandpile", Position,
#' @format NULL
#' @usage NULL
#' @export
PositionPile <- ggproto("PositionPile", PositionStrandpile, strandwise = FALSE, base = 0.1)
PositionPile <- ggproto("PositionPile", PositionStrandpile, strandwise = FALSE, base = 0)
#' @rdname position_strand
#' @format NULL
#' @usage NULL
Expand Down
Loading