diff --git a/.Rbuildignore b/.Rbuildignore index 045d5c5..133bd38 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -11,3 +11,5 @@ ^vignettes/pkgdown$ ^\.github$ ^codecov\.yml$ +^doc$ +^Meta$ diff --git a/DESCRIPTION b/DESCRIPTION index 4e9c5dc..a8d0290 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: ragp Type: Package Title: Mining for Hydroxyproline rich glycoprotein sequences -Version: 0.3.5 +Version: 0.3.5.9000 Authors@R: c( person("Milan", "Dragicevic", email = "mdragicevic@ibiss.bg.ac.rs", role = c("aut", "cre"), comment = c(ORCID = "0000-0002-9422-2952")), diff --git a/NAMESPACE b/NAMESPACE index 3e54b6f..16b43f4 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -89,7 +89,6 @@ export(get_tmhmm) export(maab) export(pfam2go) export(plot_prot) -export(plot_signalp) export(predict_hyp) export(scan_ag) export(scan_nglc) diff --git a/NEWS.md b/NEWS.md index 1ff6a33..25d6998 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,31 @@ + +ragp 0.3.5.9000 +=============== + +Bug Fixes and Improvements +-------------------------- + +* `plot_signalp()` has been deprecated and removed. + +* `get_signalp5()` now works with https://services.healthtech.dtu.dk/service.php?SignalP-5.0 site since the old link http://www.cbs.dtu.dk/services/SignalP/ does not function any longer. + +* `get_signalp()` now works with https://services.healthtech.dtu.dk/service.php?SignalP-4.1 since the old link http://www.cbs.dtu.dk/services/SignalP-4.1/ does not function any longer. + +* `get_signalp()` now now runs one job at a time. + +* `get_signalp()` splitter argument default value has been changed to 1000. + +* `get_signalp()` sleep argument has been removed. + +* `get_targetp()` now works with https://services.healthtech.dtu.dk/service.php?TargetP-1.1 since the old link http://www.cbs.dtu.dk/services/TargetP-1.1/ does not function any longer. + +* `get_targetp()` now now runs one job at a time. + +* `get_targetp()` splitter argument default value has been changed to 1000. + +* `get_targetp()` sleep argument has been removed. . + + ragp 0.3.5 =============== diff --git a/R/get_signalp.R b/R/get_signalp.R index 75f23dc..8f95e09 100644 --- a/R/get_signalp.R +++ b/R/get_signalp.R @@ -13,8 +13,7 @@ #' @param method One of c("best", "notm"), defaults to "best". Signalp 4.1 contains two types of neural networks. SignalP-TM has been trained with sequences containing transmembrane segments in the data set, while SignalP-noTM has been trained without those sequences. Per default, SignalP 4.1 uses SignalP-TM as a preprocessor to determine whether to use SignalP-TM or SignalP-noTM in the final prediction (if 4 or more positions are predicted to be in a transmembrane state, SignalP-TM is used, otherwise SignalP-noTM). An exception is Gram-positive bacteria, where SignalP-TM is used always. If you are confident that there are no transmembrane segments in your data, you can get a slightly better performance by choosing "Input sequences do not include TM regions", which will tell SignalP 4.1 to use SignalP-noTM always. #' @param minlen An integer value corresponding to the minimal predicted signal peptide length, at default set to 10. SignalP 4.0 could, in rare cases, erroneously predict signal peptides shorter than 10 residues. These errors have in SignalP 4.1 been eliminated by imposing a lower limit on the cleavage site position (signal peptide length). The minimum length is by default 10, but you can adjust it. Signal peptides shorter than 15 residues are very rare. If you want to disable this length restriction completely, enter 0 (zero). #' @param trunc An integer value corresponding to the N-terminal truncation of input sequence, at default set to 70. By default, the predictor truncates each sequence to max. 70 residues before submitting it to the neural networks. If you want to predict extremely long signal peptides, you can try a higher value, or disable truncation completely by entering 0 (zero). -#' @param splitter An integer indicating the number of sequences to be in each .fasta file that is to be sent to the server. Defaults to 500. Change only in case of a server side error. Accepted values are in range of 1 to 2000. -#' @param sleep A numeric indicating the pause in seconds between POST and GET server calls, at default set to 1s. Decreasing is not recommended. +#' @param splitter An integer indicating the number of sequences to be in each .fasta file that is to be sent to the server. Default is 1000. Change only in case of a server side error. Accepted values are in range of 1 to 2000. #' @param attempts Integer, number of attempts if server unresponsive, at default set to 2. #' @param progress Boolean, whether to show the progress bar, at default set to FALSE. #' @param ... currently no additional arguments are accepted apart the ones documented bellow. @@ -42,7 +41,7 @@ #' @source \url{https://services.healthtech.dtu.dk/service.php?SignalP-4.1} #' @references Petersen TN. Brunak S. Heijne G. Nielsen H. (2011) SignalP 4.0: discriminating signal peptides from transmembrane regions. Nature Methods 8: 785-786 #' -#' @seealso \code{\link[ragp]{get_phobius}} \code{\link[ragp]{get_targetp}} +#' @seealso \code{\link[ragp]{get_signalp5}} \code{\link[ragp]{get_phobius}} \code{\link[ragp]{get_targetp}} #' #' @examples #' library(ragp) @@ -73,17 +72,16 @@ get_signalp.character <- function(data, method = c("best", "notm"), minlen = NULL, trunc = 70L, - splitter = 500L, - sleep = 3, + splitter = 1000L, attempts = 2, progress = FALSE, ...){ if (missing(splitter)) { - splitter <- 500L + splitter <- 1000L } if (length(splitter) > 1){ - splitter <- 500L - warning("splitter should be of length 1, setting to default: splitter = 500", + splitter <- 1000L + warning("splitter should be of length 1, setting to default: splitter = 1000", call. = FALSE) } if (!is.numeric(splitter)){ @@ -92,16 +90,16 @@ get_signalp.character <- function(data, call. = FALSE) } if (is.na(splitter)){ - splitter <- 500L - warning("splitter was set to NA, setting to default: splitter = 500", + splitter <- 1000L + warning("splitter was set to NA, setting to default: splitter = 1000", call. = FALSE) } if (is.numeric(splitter)) { splitter <- floor(splitter) } if (!(splitter %in% 1:2000)) { - splitter <- 500L - warning("Illegal splitter input, splitter will be set to 500", + splitter <- 1000L + warning("Illegal splitter input, splitter will be set to 1000", call. = FALSE) } if (!missing(trunc)){ @@ -169,28 +167,6 @@ get_signalp.character <- function(data, warning("progress was set to NA, setting to default: progress = FALSE", call. = FALSE) } - if (missing(sleep)) { - sleep <- 3 - } - if (length(sleep) > 1){ - sleep <- 3 - warning("sleep should be of length 1, setting to default: sleep = 3", - call. = FALSE) - } - if (!is.numeric(sleep)){ - sleep <- as.numeric(sleep) - warning("sleep is not numeric, converting using 'as.numeric'", - call. = FALSE) - } - if (is.na(sleep)){ - sleep <- 3 - warning("sleep was set to NA, setting to default: sleep = 3", - call. = FALSE) - } - if (sleep < 2){ - warning("setting sleep to less than 2s can cause problems when fetching results from the server", - call. = FALSE) - } if (missing(org_type)) { org_type <- "euk" } @@ -293,8 +269,8 @@ get_signalp.character <- function(data, stop("cannot find file in the specified path", call. = FALSE) } - url <- "http://www.cbs.dtu.dk/cgi-bin/webface2.fcgi" - cfg_file <- "/usr/opt/www/pub/CBS/services/SignalP-4.1/SignalP.cf" + url <- "https://services.healthtech.dtu.dk/cgi-bin/webface2.fcgi" + cfg_file <- "/var/www/html/services/SignalP-4.1/webface.cf" file_list <- ragp::split_fasta(path_in = file_name, path_out = "tmp_signalp_", num_seq = splitter, @@ -308,75 +284,57 @@ get_signalp.character <- function(data, max = for_pb, style = 3) } - splt <- (seq_along(file_list) - 1) %/% 10 - file_list <- split(file_list, - splt) - output <- vector("list", length(file_list)*10) + output <- vector("list", length(file_list)) for(k in seq_along(file_list)){ x <- file_list[[k]] - jobid <- vector("character", 10) - for (i in seq_along(x)) { - file_up <- httr::upload_file(x[i]) - if (trunc == 1000000L){ - trunc <- "" - } - res <- httr::POST(url = url, - encode = "multipart", - body = list(configfile = cfg_file, - SEQSUB = file_up, - orgtype = org_type, - `Dcut-type` = Dcut_type, - `Dcut-noTM` = Dcut_noTM, - `Dcut-TM` = Dcut_TM, - graphmode = NULL, - format = "short", - minlen = minlen, - method = method, - trunc = as.character(trunc))) - if(!grepl("jobid=", res$url)){ - stop("something went wrong on server side") - } - res <- sub("http://www.cbs.dtu.dk/cgi-bin/webface2.fcgi?jobid=", - "", - res$url, - fixed = TRUE) - - res <- sub("&wait=20", - "", - res, - fixed = TRUE) - jobid[i] <- res - if(progress){ - utils::setTxtProgressBar(pb, - floor(i/2) + (10 * (k - 1))) - } - #Sys.sleep(sleep) + + file_up <- httr::upload_file(x) + if (trunc == 1000000L){ + trunc <- "" } - collected_res <- vector("list", length(x)) - for (i in seq_along(x)) { - time1 <- Sys.time() - repeat { - res2 <- httr::GET(url = url, - query = list(jobid = jobid[i], - wait = "20")) - bad <- xml2::xml_text( - xml2::xml_find_all( - httr::content(res2, - as = "parsed"), - "//head") - ) - if (grepl("Illegal", bad)) { - prt <- xml2::xml_text( - xml2::xml_find_all( - httr::content(res2, - as = "parsed"), - "//li") - ) - stop(paste0(prt, ". Problem in file: ", "temp_", - i, ".fa"), - call. = FALSE) - } + res <- httr::POST(url = url, + encode = "multipart", + body = list(configfile = cfg_file, + SEQSUB = file_up, + orgtype = org_type, + `Dcut-type` = Dcut_type, + `Dcut-noTM` = Dcut_noTM, + `Dcut-TM` = Dcut_TM, + graphmode = NULL, + format = "short", + minlen = minlen, + method = method, + trunc = as.character(trunc))) + if(!grepl("jobid=", res$url)){ + stop("something went wrong on server side") + } + + res <- sub("https://services.healthtech.dtu.dk/cgi-bin/webface2.cgi?jobid=", + "", + res$url, + fixed = TRUE) + + res <- sub("&wait=20", + "", + res, + fixed = TRUE) + + jobid <- res + + time1 <- Sys.time() + + repeat { + res2 <- httr::GET(url = url, + query = list(jobid = jobid, + wait = "20")) + code <- res2$status_code + + if(code != 200){ + res2_split <- NULL + warning(paste0(". Problem in file: ", + x)) + } else { res2 <- as.character( xml2::xml_find_all( httr::content(res2, @@ -387,85 +345,77 @@ get_signalp.character <- function(data, strsplit(res2, "\n") ) - Sys.sleep(1) - if (any(grepl("Cmax", res2_split))) { - break + } + Sys.sleep(2) + + if (any(grepl("Cmax", res2_split))) { + break + } + + time2 <- Sys.time() + + max.time <- as.difftime(pmax(100, splitter * 1.5), + units = "secs") + + if ((time2 - time1) > max.time) { + res2_split <- NULL + if(progress) message( + "file", + x, + "took longer then expected") + break + } + } + if (is.null(res2_split)) { + tms <- 0 + + while(tms < attempts && is.null(res2_split)){ + if(progress) message( + "reattempting file", + x) + + file_up <- httr::upload_file(x) + + res <- httr::POST(url = url, + encode = "multipart", + body = list(configfile = cfg_file, + SEQSUB = file_up, + orgtype = org_type, + `Dcut-type` = Dcut_type, + `Dcut-noTM` = Dcut_noTM, + `Dcut-TM` = Dcut_TM, + graphmode = NULL, + format = "short", + minlen = minlen, + method = method, + trunc = as.character(trunc))) + if(!grepl("jobid=", res$url)){ + stop("something went wrong on server side") } + res <- sub("https://services.healthtech.dtu.dk/cgi-bin/webface2.cgi?jobid=", + "", + res$url, + fixed = TRUE) - time2 <- Sys.time() + res <- sub("&wait=20", + "", + res, + fixed = TRUE) + jobid <- res - max.time <- as.difftime(pmax(50, splitter), - units = "secs") + time1 <- Sys.time() - if ((time2 - time1) > max.time) { - res2_split <- NULL - if(progress) message( - "file", - x[i], - "took longer then expected") - break - } - } - if (is.null(res2_split)) { - tms <- 0 - while(tms < attempts && is.null(res2_split)){ - if(progress) message( - "reattempting file", - x[i]) - file_up <- httr::upload_file(x[i]) - res <- httr::POST(url = url, - encode = "multipart", - body = list(configfile = cfg_file, - SEQSUB = file_up, - orgtype = org_type, - `Dcut-type` = Dcut_type, - `Dcut-noTM` = Dcut_noTM, - `Dcut-TM` = Dcut_TM, - graphmode = NULL, - format = "short", - minlen = minlen, - method = method, - trunc = as.character(trunc))) - if(!grepl("jobid=", res$url)){ - stop("something went wrong on server side") - } - res <- sub("http://www.cbs.dtu.dk/cgi-bin/webface2.fcgi?jobid=", - "", - res$url, - fixed = TRUE) - - res <- sub("&wait=20", - "", - res, - fixed = TRUE) - jobidi <- res + repeat { + res2 <- httr::GET(url = url, + query = list(jobid = jobid, + wait = "20")) + code <- res2$status_code - time1 <- Sys.time() - - repeat { - res2 <- httr::GET(url = url, - query = list(jobid = jobidi, - wait = "20")) - bad <- xml2::xml_text( - xml2::xml_find_all( - httr::content(res2, - as = "parsed"), - "//head") - ) - if (grepl("Illegal", bad)) { - prt <- xml2::xml_text( - xml2::xml_find_all( - httr::content(res2, - as = "parsed"), - "//li") - ) - stop(paste0(prt, - ". Problem in file: ", - "temp_", - i, - ".fa"), - call. = FALSE) - } + if(code != 200){ + res2_split <- NULL + warning(paste0( ". Problem in file: ", + x)) + } else { res2 <- as.character( xml2::xml_find_all( httr::content(res2, @@ -476,85 +426,87 @@ get_signalp.character <- function(data, strsplit(res2, "\n") ) - Sys.sleep(1) - if (any(grepl("Cmax", res2_split))) { - break - } - - time2 <- Sys.time() - - max.time <- as.difftime(pmax(100, splitter * 1.5), - units = "secs") - - if ((time2 - time1) > max.time) { - res2_split <- NULL - break - } } - tms <- tms + 1 - } - } - if (is.null(res2_split)){ - output <- do.call(rbind, - output) - output$is.signalp <- output$is.sp == "Y" - if(progress){ - utils::setTxtProgressBar(pb, - for_pb) - close(pb) + Sys.sleep(1) + if (any(grepl("Cmax", res2_split))) { + break + } + + time2 <- Sys.time() + + max.time <- as.difftime(pmax(100, splitter * 1.5), + units = "secs") + + if ((time2 - time1) > max.time) { + res2_split <- NULL + break + } } - warning( - "maximum attempts reached at", - x[i], - "returning finished queries", - call. = FALSE) - return(output) + tms <- tms + 1 } - unlink(x[i]) - - res2_split <- res2_split[(which(grepl("name", - res2_split))[1] + - 1):(which(grepl("/pre", - res2_split ))[1] - 1)] - - if(any(grepl("hr", res2_split))){ - res2_split <- res2_split[1:(which(grepl("
", - res2_split))[1] - 1)] - } - res2_split <- strsplit(res2_split, - " +") - res2_split <- do.call(rbind, - res2_split) - res2_split <- as.data.frame(res2_split, - stringsAsFactors = F) - colnames(res2_split) <- c("id", - "Cmax", - "Cmax.pos", - "Ymax", - "Ymax.pos", - "Smax", - "Smax.pos", - "Smean", - "Dmean", - "is.sp", - "Dmaxcut", - "Networks.used") - res2_split$Ymax.pos <- as.integer(as.character(res2_split$Ymax.pos)) - res2_split$Cmax.pos <- as.integer(as.character(res2_split$Cmax.pos)) - res2_split$Smax.pos <- as.integer(as.character(res2_split$Smax.pos)) - res2_split$Cmax <- as.numeric(as.character(res2_split$Cmax)) - res2_split$Ymax <- as.numeric(as.character(res2_split$Ymax)) - res2_split$Smax <- as.numeric(as.character(res2_split$Smax)) - res2_split$Smean <- as.numeric(as.character(res2_split$Smean)) - res2_split$Dmean <- as.numeric(as.character(res2_split$Dmean)) - + } + + if (is.null(res2_split)){ + output <- do.call(rbind, + output) + output$is.signalp <- output$is.sp == "Y" if(progress){ utils::setTxtProgressBar(pb, - floor(i/2) + 5 + (10 * (k - 1))) + for_pb) + close(pb) } - output[[((k*10)-10)+i]] <- res2_split + warning( + "maximum attempts reached at", + x, + "returning finished queries", + call. = FALSE) + return(output) + } + unlink(x) + + res2_split <- res2_split[(which(grepl("name", + res2_split))[1] + + 1):(which(grepl("/pre", + res2_split ))[1] - 1)] + + if(any(grepl("hr", res2_split))){ + res2_split <- res2_split[1:(which(grepl("
", + res2_split))[1] - 1)] + } + res2_split <- strsplit(res2_split, + " +") + res2_split <- do.call(rbind, + res2_split) + res2_split <- as.data.frame(res2_split, + stringsAsFactors = F) + colnames(res2_split) <- c("id", + "Cmax", + "Cmax.pos", + "Ymax", + "Ymax.pos", + "Smax", + "Smax.pos", + "Smean", + "Dmean", + "is.sp", + "Dmaxcut", + "Networks.used") + res2_split$Ymax.pos <- as.integer(as.character(res2_split$Ymax.pos)) + res2_split$Cmax.pos <- as.integer(as.character(res2_split$Cmax.pos)) + res2_split$Smax.pos <- as.integer(as.character(res2_split$Smax.pos)) + res2_split$Cmax <- as.numeric(as.character(res2_split$Cmax)) + res2_split$Ymax <- as.numeric(as.character(res2_split$Ymax)) + res2_split$Smax <- as.numeric(as.character(res2_split$Smax)) + res2_split$Smean <- as.numeric(as.character(res2_split$Smean)) + res2_split$Dmean <- as.numeric(as.character(res2_split$Dmean)) + + if(progress){ + utils::setTxtProgressBar(pb, + k) } + output[[k]] <- res2_split } + if(progress){ utils::setTxtProgressBar(pb, for_pb) diff --git a/R/get_signalp5.R b/R/get_signalp5.R index d31e424..ab748a7 100644 --- a/R/get_signalp5.R +++ b/R/get_signalp5.R @@ -36,7 +36,7 @@ #' @note This function creates temporary files in the working directory. If something goes wrong during communication with the server and progress was set to TRUE, predictions can be obtained using `file.path("http://www.cbs.dtu.dk/services/SignalP-5.0/tmp", jobid, "output_protein_type.txt")` eg `read.delim(file.path(...), header = TRUE, skip = 1)`. #' #' -#' @source \url{http://www.cbs.dtu.dk/services/SignalP/} +#' @source \url{https://services.healthtech.dtu.dk/service.php?SignalP-5.0} #' @references Almagro Armenteros JJ, Tsirigos KD, Sønderby CK, Petersen TN, Winther O, Brunak S, von Heijne G, Nielsen H. (2019) SignalP 5.0 improves signal peptide predictions using deep neural networks. Nature Biotechnology, 37:420-423, doi:10.1038/s41587-019-0036-z # #' @seealso \code{\link[ragp]{get_signalp}} \code{\link[ragp]{get_targetp}} @@ -167,8 +167,8 @@ get_signalp5.character <- function(data, organism <- organism[names(organism) == org_type] - url <- "http://www.cbs.dtu.dk/cgi-bin/webface2.fcgi" - cfg_file <- "/usr/opt/www/pub/CBS/services/SignalP-5.0/signalp5.cf" + url <- "https://services.healthtech.dtu.dk/cgi-bin/webface2.fcgi" + cfg_file <- "/var/www/html/services/SignalP-5.0/webface.cf" file_list <- ragp::split_fasta(path_in = file_name, path_out = "tmp_signalp5_", num_seq = splitter) @@ -193,7 +193,7 @@ get_signalp5.character <- function(data, } - res <- sub("http://www.cbs.dtu.dk/cgi-bin/webface2.fcgi?jobid=", + res <- sub("https://services.healthtech.dtu.dk/cgi-bin/webface2.cgi?jobid=", "", res$url, fixed = TRUE) @@ -212,7 +212,7 @@ get_signalp5.character <- function(data, time1 <- Sys.time() repeat { - res2 <- httr::GET(url = file.path("http://www.cbs.dtu.dk/services/SignalP-5.0/tmp", + res2 <- httr::GET(url = file.path("https://services.healthtech.dtu.dk/services/SignalP-5.0/tmp", jobid, "output_protein_type.txt")) code <- res2$status_code @@ -220,7 +220,7 @@ get_signalp5.character <- function(data, Sys.sleep(5) if (code == 200L) { res2 <- read.delim( - file.path("http://www.cbs.dtu.dk/services/SignalP-5.0/tmp", + file.path("https://services.healthtech.dtu.dk/services/SignalP-5.0/tmp", jobid, "output_protein_type.txt"), header = TRUE, @@ -258,7 +258,7 @@ get_signalp5.character <- function(data, stop("something went wrong on server side") } - res <- sub("http://www.cbs.dtu.dk/cgi-bin/webface2.fcgi?jobid=", + res <- sub("https://services.healthtech.dtu.dk/cgi-bin/webface2.cgi?jobid=", "", res$url, fixed = TRUE) @@ -276,14 +276,14 @@ get_signalp5.character <- function(data, time1 <- Sys.time() repeat { - res2 <- httr::GET(url = file.path("http://www.cbs.dtu.dk/services/SignalP-5.0/tmp", + res2 <- httr::GET(url = file.path("https://services.healthtech.dtu.dk/services/SignalP-5.0/tmp", jobid, "output_protein_type.txt")) code <- res2$status_code Sys.sleep(5) if (code == 200L) { res2 <- read.delim( - file.path("http://www.cbs.dtu.dk/services/SignalP-5.0/tmp", + file.path("https://services.healthtech.dtu.dk/services/SignalP-5.0/tmp", jobid, "output_protein_type.txt"), header = TRUE, diff --git a/R/get_targetp.R b/R/get_targetp.R index c34abb1..dc86285 100644 --- a/R/get_targetp.R +++ b/R/get_targetp.R @@ -12,8 +12,7 @@ #' @param pcut A numeric value, with range 0 - 1, defaults to 0 (cutoff = "winner_takes_all"). cTP user specified cutoff. #' @param scut A numeric value, with range 0 - 1, defaults to 0 (cutoff = "winner_takes_all"). SP user specified cutoff. #' @param ocut A numeric value, with range 0 - 1, defaults to 0 (cutoff = "winner_takes_all"). User specified cutoff for "other" (not with mTP, cTP, SP). -#' @param splitter An integer indicating the number of sequences to be in each .fasta file that is to be sent to the server. Defaults to 200. Change only in case of a server side error. Accepted values are in range of 1 to 2000. -#' @param sleep A numeric indicating the pause in seconds between server calls, at default set to 1 +#' @param splitter An integer indicating the number of sequences to be in each .fasta file that is to be sent to the server. Defaults to 1000. Change only in case of a server side error. Accepted values are in range of 1 to 2000. #' @param attempts Integer, number of attempts if server unresponsive, at default set to 2. #' @param progress Boolean, whether to show the progress bar, at default set to FALSE. #' @param ... currently no additional arguments are accepted apart the ones documented bellow. @@ -37,7 +36,7 @@ #' @source \url{https://services.healthtech.dtu.dk/service.php?TargetP-1.1} #' @references Emanuelsson O, Nielsen H, Brunak S,von Heijne G. (2000) Predicting subcellular localization of proteins based on their N-terminal amino acid sequence. J. Mol. Biol.300: 1005-1016 #' -#' @seealso \code{\link[ragp]{get_signalp}} +#' @seealso \code{\link[ragp]{get_signalp}} \code{\link[ragp]{get_signalp5}} #' #' @examples #' library(ragp) @@ -68,8 +67,7 @@ get_targetp.character <- function(data, pcut = NULL, scut = NULL, ocut = NULL, - splitter = 200, - sleep = 3, + splitter = 1000, attempts = 2, progress = FALSE, ...){ @@ -96,34 +94,12 @@ get_targetp.character <- function(data, if (missing(ocut)){ ocut <- 0 } - if (missing(sleep)){ - sleep <- 3 - } - if (length(sleep) > 1){ - sleep <- 3 - warning("sleep should be of length 1, setting to default: sleep = 3", - call. = FALSE) - } - if (!is.numeric(sleep)){ - sleep <- as.numeric(sleep) - warning("sleep is not numeric, converting using 'as.numeric'", - call. = FALSE) - } - if (is.na(sleep)){ - sleep <- 3 - warning("sleep was set to NA, setting to default: sleep = 3", - call. = FALSE) - } - if (sleep < 2){ - warning("setting sleep to less than 2s can cause problems when fetching results from the server", - call. = FALSE) - } if (missing(splitter)){ - splitter <- 200 + splitter <- 1000 } if (length(splitter) > 1){ - splitter <- 200 - warning("splitter should be of length 1, setting to default: splitter = 200", + splitter <- 1000 + warning("splitter should be of length 1, setting to default: splitter = 1000", call. = FALSE) } if (!is.numeric(splitter)){ @@ -132,16 +108,16 @@ get_targetp.character <- function(data, call. = FALSE) } if (is.na(splitter)){ - splitter <- 200 - warning("splitter was set to NA, setting to default: splitter = 200", + splitter <- 1000 + warning("splitter was set to NA, setting to default: splitter = 1000", call. = FALSE) } if (is.numeric(splitter)) { splitter <- floor(splitter) } if (!(splitter %in% 1:2000)){ - splitter <- 200 - warning("Illegal splitter input, splitter will be set to 200", + splitter <- 1000 + warning("Illegal splitter input, splitter will be set to 1000", call. = FALSE) } if (length(attempts) > 1){ @@ -384,82 +360,69 @@ get_targetp.character <- function(data, max = for_pb, style = 3) } - splt <- (seq_along(file_list) - 1) %/% 10 - file_list <- split(file_list, - splt) - output <- vector("list", length(file_list)*10) + + output <- vector("list", length(file_list)) + url <- "https://services.healthtech.dtu.dk/cgi-bin/webface2.fcgi" + cfg <- "/var/www/html/services/TargetP-1.1/webface.cf" for(k in seq_along(file_list)){ x <- file_list[[k]] - jobid <- vector("character", 10) - for (i in seq_along(x)){ - file_up <- httr::upload_file(x[i]) - res <- httr::POST( - url = "http://www.cbs.dtu.dk/cgi-bin/webface2.fcgi?", - encode = "multipart", - body = list( - `configfile` = "/usr/opt/www/pub/CBS/services/TargetP-1.1/TargetP.cf", - `SEQSUB` = file_up, - `orgtype` = org_type, - `cleavsite` = "on", - `spec` = spec, - `tcut` = tcut, - `pcut` = pcut, - `scut` = scut, - `ocut` = ocut - )) + file_up <- httr::upload_file(x) + + res <- httr::POST( + url = url, + encode = "multipart", + body = list( + `configfile` = cfg, + `SEQSUB` = file_up, + `orgtype` = org_type, + `cleavsite` = "on", + `spec` = spec, + `tcut` = tcut, + `pcut` = pcut, + `scut` = scut, + `ocut` = ocut + )) + + if(!grepl("jobid=", res$url)){ + stop("something went wrong on server side") + } + + res <- sub("https://services.healthtech.dtu.dk/cgi-bin/webface2.cgi?jobid=", + "", + res$url, + fixed = TRUE) + + res <- sub("&wait=20", + "", + res, + fixed = TRUE) + + jobid <- res + + if(progress){ + utils::setTxtProgressBar(pb, + k) + } - if(!grepl("jobid=", res$url)){ - stop("something went wrong on server side") - } - res <- sub("http://www.cbs.dtu.dk/cgi-bin/webface2.fcgi?jobid=", - "", - res$url, - fixed = TRUE) + Sys.sleep(2) + + time1 <- Sys.time() + + repeat { + res2 <- httr::GET( + url = url, + query = list( + jobid = jobid, + wait = "20" + )) - res <- sub("&wait=20", - "", - res, - fixed = TRUE) - jobid[i] <- res + code <- res2$status_code - if(progress){ - utils::setTxtProgressBar(pb, - floor(i/2) + (10 * (k - 1))) - } - Sys.sleep(sleep) - } - collected_res <- vector("list", - length(jobid)) - - for (i in seq_along(x)){ - time1 <- Sys.time() - repeat { - res2 <- httr::GET( - url = "http://www.cbs.dtu.dk/cgi-bin/webface2.fcgi?", - query = list( - jobid = jobid[i], - wait = "20" - )) - bad <- xml2::xml_text( - xml2::xml_find_all( - httr::content(res2, - as = "parsed"), - "//head") - ) - if (grepl("Illegal", bad)){ - prt <- xml2::xml_text( - xml2::xml_find_all( - httr::content(res2, - as = "parsed"), - "//li") - ) - stop(paste0(prt, - ". Problem in file: ", - "temp_", - i, - ".fa"), - call. = FALSE) - } + if(code != 200){ + res2_split <- NULL + warning(paste0( ". Problem in file: ", + x)) + } else { res2 <- as.character( xml2::xml_find_all( httr::content(res2, @@ -470,89 +433,85 @@ get_targetp.character <- function(data, strsplit(res2, "\n") ) - Sys.sleep(1) - if (any(grepl("mTP", res2_split))){ - break + } + + Sys.sleep(1) + + if (any(grepl("mTP", res2_split))){ + + break + } + + time2 <- Sys.time() + + max.time <- as.difftime(pmax(50, splitter), + units = "secs") + + + if ((time2 - time1) > max.time) { + res2_split <- NULL + if(progress) message( + "file", + x, + "took longer then expected") + break + } + } + if (is.null(res2_split)) { + tms <- 0 + while(tms < attempts && is.null(res2_split)){ + if(progress) message( + "reattempting file", + x) + file_up <- httr::upload_file(x) + res <- httr::POST( + url = url, + encode = "multipart", + body = list( + `configfile` = cfg, + `SEQSUB` = file_up, + `orgtype` = org_type, + `cleavsite` = "on", + `spec` = spec, + `tcut` = tcut, + `pcut` = pcut, + `scut` = scut, + `ocut` = ocut + )) + + if(!grepl("jobid=", res$url)){ + stop("something went wrong on server side") } - time2 <- Sys.time() + res <- sub("https://services.healthtech.dtu.dk/cgi-bin/webface2.cgi?jobid=", + "", + res$url, + fixed = TRUE) - max.time <- as.difftime(pmax(50, splitter), - units = "secs") + res <- sub("&wait=20", + "", + res, + fixed = TRUE) - if ((time2 - time1) > max.time) { - res2_split <- NULL - if(progress) message( - "file", - x[i], - "took longer then expected") - break - } - } - - if (is.null(res2_split)) { - tms <- 0 - while(tms < attempts && is.null(res2_split)){ - if(progress) message( - "reattempting file", - x[i]) - file_up <- httr::upload_file(x[i]) - res <- httr::POST( - url = "http://www.cbs.dtu.dk/cgi-bin/webface2.fcgi?", - encode = "multipart", - body = list( - `configfile` = "/usr/opt/www/pub/CBS/services/TargetP-1.1/TargetP.cf", - `SEQSUB` = file_up, - `orgtype` = org_type, - `cleavsite` = "on", - `spec` = spec, - `tcut` = tcut, - `pcut` = pcut, - `scut` = scut, - `ocut` = ocut + jobid <- res + + time1 <- Sys.time() + + repeat { + res2 <- httr::GET( + url = url, + query = list( + jobid = jobid, + wait = "20" )) - if(!grepl("jobid=", res$url)){ - stop("something went wrong on server side") - } - res <- sub("http://www.cbs.dtu.dk/cgi-bin/webface2.fcgi?jobid=", - "", - res$url, - fixed = TRUE) - res <- sub("&wait=20", - "", - res, - fixed = TRUE) - jobidi <- res + code <- res2$status_code - time1 <- Sys.time() - repeat { - res2 <- httr::GET( - url <- "http://www.cbs.dtu.dk/cgi-bin/webface2.fcgi?", - query = list( - jobid = jobidi, - wait = "20" - )) - bad <- xml2::xml_text( - xml2::xml_find_all( - httr::content(res2, - as = "parsed"), - "//head") - ) - if (grepl("Illegal", bad)){ - prt <- xml2::xml_text( - xml2::xml_find_all( - httr::content(res2, - as = "parsed"), - "//li") - ) - stop(paste0(prt, - ". Problem in file: ", - "temp_", - i, - ".fa"), - call. = FALSE) - } + if(code != 200){ + res2_split <- NULL + warning(paste0( ". Problem in file: ", + x)) + } else { res2 <- as.character( xml2::xml_find_all( httr::content(res2, @@ -563,54 +522,57 @@ get_targetp.character <- function(data, strsplit(res2, "\n") ) - Sys.sleep(1) - if (any(grepl("mTP", res2_split))){ - break - } - - time2 <- Sys.time() - - max.time <- as.difftime(pmax(100, splitter * 1.5), - units = "secs") - - if ((time2 - time1) > max.time) { - res2_split <- NULL - break - } } - tms <- tms + 1 + Sys.sleep(1) + if (any(grepl("mTP", res2_split))){ + break + } + + time2 <- Sys.time() + + max.time <- as.difftime(pmax(100, splitter * 1.5), + units = "secs") + + if ((time2 - time1) > max.time) { + res2_split <- NULL + break + } } + + tms <- tms + 1 } - if (is.null(res2_split)){ - output <- do.call(rbind, - output) - output$is.targetp <- output$Loc == "S" - if(progress){ - utils::setTxtProgressBar(pb, - for_pb) - close(pb) - } - warning( - "maximum attempts reached at", - x[i], - "returning finished queries", - call. = FALSE) - return(output) + } + + if (is.null(res2_split)){ + output <- do.call(rbind, + output) + output$is.targetp <- output$Loc == "S" + if(progress){ + utils::setTxtProgressBar(pb, + for_pb) + close(pb) } - unlink(x[i]) - res2_split <- res2_split[(which(grepl("mTP", - res2_split))[1]+2):(which(grepl("cutoff", - res2_split))[1] - 2)] - res2_split <- strsplit(res2_split, - " +") - - res2_split <- do.call(rbind, - res2_split) - - res2_split <- as.data.frame(res2_split, - stringsAsFactors = FALSE) - - if(org_type == "plant"){ + warning( + "maximum attempts reached at", + x, + "returning finished queries", + call. = FALSE) + return(output) + } + unlink(x) + res2_split <- res2_split[(which(grepl("mTP", + res2_split))[1]+2):(which(grepl("cutoff", + res2_split))[1] - 2)] + res2_split <- strsplit(res2_split, + " +") + + res2_split <- do.call(rbind, + res2_split) + + res2_split <- as.data.frame(res2_split, + stringsAsFactors = FALSE) + + if(org_type == "plant"){ colnames(res2_split) <- c("Name", "Len", "cTP", @@ -620,8 +582,8 @@ get_targetp.character <- function(data, "Loc", "RC", "TPlen") - } - + } + if(org_type == "non_plant"){ colnames(res2_split) <- c("Name", "Len", @@ -632,13 +594,11 @@ get_targetp.character <- function(data, "RC", "TPlen") } - - if(progress){ - utils::setTxtProgressBar(pb, - floor(i/2) + 5 + (10 * (k - 1))) - } - output[[((k*10)-10)+i]] <- res2_split + if(progress){ + utils::setTxtProgressBar(pb, + k) } + output[[k]] <- res2_split } if(progress){ diff --git a/R/plot_prot.R b/R/plot_prot.R index 29e928a..645b415 100644 --- a/R/plot_prot.R +++ b/R/plot_prot.R @@ -26,15 +26,14 @@ #' #' @return A ggplot2 plot object #' -#' @seealso \code{\link[ragp]{get_signalp5}} \code{\link[ragp]{get_signalp}} \code{\link[ragp]{get_phobius}} \code{\link[ragp]{get_tmhmm}} \code{\link[ragp]{get_hmm}} \code{\link[ragp]{get_espritz}} \code{\link[ragp]{predict_hyp}} \code{\link[ragp]{scan_ag}} +#' @seealso \code{\link[ragp]{get_signalp5}} \code{\link[ragp]{get_signalp}} \code{\link[ragp]{get_phobius}} \code{\link[ragp]{get_tmhmm}} \code{\link[ragp]{get_hmm}} \code{\link[ragp]{get_cdd}} \code{\link[ragp]{get_espritz}} \code{\link[ragp]{predict_hyp}} \code{\link[ragp]{scan_ag}} #' #' @examples #' library(ragp) #' library(ggplot2) #' ind <- c(23, 5, 80, 81, 345) #' pred <- plot_prot(sequence = at_nsp$sequence[ind], -#' id = at_nsp$Transcript.id[ind], -#' bitscore = 30) #passed to get_hmm +#' id = at_nsp$Transcript.id[ind]) #' pred + #' theme(legend.position = "bottom", #' legend.direction = "vertical") @@ -44,9 +43,9 @@ #' id = Transcript.id, #' sequence = sequence) #' -#' hmm <- get_hmm(data = at_nsp[ind,], +#' hmm <- get_hmm(data = at_nsp[ind,], #default is to use get_cdd() #' id = Transcript.id, -#' sequence = sequence) +#' sequence = sequence) #' #' gpi <- get_netGPI(data = at_nsp[ind,], #' id = Transcript.id, @@ -66,8 +65,7 @@ #' nsp = nsp, #' gpi = gpi, #' domain = hmm, -#' disorder = disorder, -#' bitscore = 30) +#' disorder = disorder) #' #' #' pred2 + @@ -530,11 +528,11 @@ plot_prot <- function(sequence, sep = "")) if (dom_sort == "abc"){ - seq_hmm <- seq_hmm[with(seq_hmm, order(id_num, name)),] + seq_hmm <- seq_hmm[with(seq_hmm, order(id_num, domain)),] } if (dom_sort == "cba"){ seq_hmm <- seq_hmm[with(seq_hmm, order(id_num, - name, + domain, decreasing = c(FALSE, TRUE), method = "radix")),] } diff --git a/R/plot_signalp.R b/R/plot_signalp.R deleted file mode 100644 index 1103f37..0000000 --- a/R/plot_signalp.R +++ /dev/null @@ -1,340 +0,0 @@ -#' Plotting SignalP prediction. -#' -#' Plots the SignalP prediction for one protein sequence using base graphics. SignalP 4.1 server predicts the presence and location of signal peptide cleavage sites in amino acid sequences from different organisms: Gram-positive prokaryotes, Gram-negative prokaryotes, and eukaryotes. The method incorporates a prediction of cleavage sites and a signal peptide/non-signal peptide prediction based on a combination of several artificial neural networks. -#' -#' @param sequence String representing a protein amino acid sequence. -#' @param id String representing a protein identifier. -#' @param org_type One of c("euk", "gram-", "gram+"), defaults to "euk". Which model should be used for prediction. -#' @param Dcut_type One of c("default", "sensitive", "user"), defaults to "default". The default cutoff values for SignalP 4 are chosen to optimize the performance measured as Matthews Correlation Coefficient (MCC). This results in a lower sensitivity (true positive rate) than SignalP 3.0 had. Setting this argument to "sensitive" will yield the same sensitivity as SignalP 3.0. This will make the false positive rate slightly higher, but still better than that of SignalP 3.0. -#' @param Dcut_noTM A numeric value, with range 0 - 1, defaults to 0.45. For experimenting with cutoff values. -#' @param Dcut_TM A numeric value, with range 0 - 1, defaults to 0.5. For experimenting with cutoff values. -#' @param method One of c("best", "notm"), defaults to "best". Signalp 4.1 contains two types of neural networks. SignalP-TM has been trained with sequences containing transmembrane segments in the data set, while SignalP-noTM has been trained without those sequences. Per default, SignalP 4.1 uses SignalP-TM as a preprocessor to determine whether to use SignalP-TM or SignalP-noTM in the final prediction (if 4 or more positions are predicted to be in a transmembrane state, SignalP-TM is used, otherwise SignalP-noTM). An exception is Gram-positive bacteria, where SignalP-TM is used always. If you are confident that there are no transmembrane segments in your data, you can get a slightly better performance by choosing "Input sequences do not include TM regions", which will tell SignalP 4.1 to use SignalP-noTM always. -#' @param c.score.col Plotting color of the C-score line. At default set to: '#ff0000'. -#' @param s.score.col Plotting color of the S-score line. At default set to: '#728fcc'. -#' @param y.score.col Plotting color of the Y-score line. At default set to: '#728fcc'. -#' @param t.col Plotting color of the threshold line. At default set to: '#551a8b'. -#' @param main Title of the plot. -#' @param sleep A numeric indicating the pause in seconds between POST and GET server calls, at default set to 5s. Decreasing is not recommended. -#' @return A list with two elements: -#' \describe{ -#' \item{prediction}{Data frame with the prediction results.} -#' \item{plot}{Data frame with values used for plotting.} -#' } -#' -#' @source \url{http://www.cbs.dtu.dk/services/SignalP-4.1/} -#' @references Petersen TN. Brunak S. Heijne G. Nielsen H. (2011) SignalP 4.0: discriminating signal peptides from transmembrane regions. Nature Methods 8: 785-786 -#' -#' @seealso \code{\link[ragp]{get_signalp}} -#' -#' @examples -#' library(ragp) -#' pred <- plot_signalp(sequence = at_nsp$sequence[5], -#' id = at_nsp$Transcript.id[5]) -#' -#' @export - -plot_signalp <- function(sequence, - id, - org_type = c("euk", "gram-", "gram+"), - Dcut_type = c("default", "sensitive", "user"), - Dcut_noTM = 0.45, - Dcut_TM = 0.5, - method = c("best", "notm"), - c.score.col = "#ff0000", - s.score.col = "#59a454", - y.score.col = "#728fcc", - t.col = "#551a8b", - main = NULL, - sleep = 5L){ - if (missing(sleep)) { - sleep <- 5 - } - if (length(sleep) > 1){ - sleep <- 5 - warning("sleep should be of length 1, setting to default: sleep = 5") - } - if (!is.numeric(sleep)){ - sleep <- as.numeric(sleep) - warning("sleep is not numeric, converting using 'as.numeric'") - } - if (is.na(sleep)){ - sleep <- 5 - warning("sleep was set to NA, setting to default: sleep = 3") - } - if (sleep < 2){ - warning("setting sleep to less than 2s can cause problems when fetching results from the server") - } - if (missing(org_type)) { - org_type <- "euk" - } - if (!org_type %in% c("euk", "gram-", "gram+")) { - stop("org_type should be one of: 'euk', 'gram-', 'gram+'") - } - if (length(org_type) > 1){ - stop("org_type should be one of: 'euk', 'gram-', 'gram+'") - } - if (missing(Dcut_type)) { - Dcut_type <- "default" - } - if (!Dcut_type %in% c("default", "sensitive", "user")) { - stop("Dcut_type should be one of: 'default', 'sensitive', 'user'") - } - if (length(Dcut_type) > 1){ - stop("Dcut_type should be one of: 'default', 'sensitive', 'user'") - } - if (missing(Dcut_noTM)) { - Dcut_noTM <- "0.45" - } else { - Dcut_noTM <- as.character(Dcut_noTM)[1] - } - if (!is.numeric(as.numeric(Dcut_noTM))){ - Dcut_noTM <- "0.45" - warning("Dcut_noTM could not be converted to numeric, setting to default: Dcut_noTM = '0.45'") - } - if (is.na(Dcut_noTM)) { - Dcut_noTM <- "0.45" - warning("Dcut_noTM was set to NA, setting to default: Dcut_noTM = '0.45'") - } - if (as.numeric(Dcut_noTM[1]) > 1) { - Dcut_noTM <- "0.45" - warning("Dcut_noTM must take values in the range 0 - 1, - it was set to the default: Dcut_noTM = '0.45'") - } - if (as.numeric(Dcut_noTM[1]) < 0) { - Dcut_noTM <- "0.45" - warning("Dcut_noTM must take values in the range 0 - 1, - it was set to the default: Dcut_noTM = '0.45'") - } - if (missing(Dcut_TM)) { - Dcut_TM <- "0.5" - } else { - Dcut_TM <- as.character(Dcut_TM)[1] - } - if (!is.numeric(as.numeric(Dcut_TM))){ - Dcut_TM <- "0.5" - warning("Dcut_TM could not be converted to numeric, setting to default: Dcut_TM = '0.5'") - } - if (is.na(Dcut_TM)) { - Dcut_TM <- "0.5" - warning("Dcut_noTM was set to NA, setting to default: Dcut_TM = '0.5'") - } - if (as.numeric(Dcut_TM[1]) > 1) { - Dcut_TM <- "0.5" - warning("Dcut_TM must take values in the range 0 - 1, - it was set to the default: Dcut_TM = '0.5'") - } - if (as.numeric(Dcut_TM[1]) < 0) { - Dcut_TM <- "0.5" - warning("Dcut_TM must take values in the range 0 - 1, - it was set to the default: Dcut_TM = '0.5'") - } - if (missing(method)) { - method <- "best" - } - if (!method %in% c("best", "notm")){ - stop("method should be one of: 'best', 'notm'") - } - if (length(method) > 1){ - stop("method should be one of: 'best', 'notm'") - } - if (missing(sequence)){ - stop("protein sequence must be provided to obtain predictions") - } - if (missing(id)){ - stop("protein id must be provided to obtain predictions") - } - if (length(sequence) != 1){ - stop("one string representing a protein sequence should be provided") - } - if (length(id) != 1){ - stop("one string representing a protein id should be provided") - } - areColors <- function(x) { - sapply(x, function(X) { - tryCatch(is.matrix(grDevices::col2rgb(X)), - error = function(e) FALSE) - }) - } - if (length(c.score.col) > 1){ - c.score.col <- '#ff0000' - warning("One color should be provided for c.score.col. Using default: '#ff0000'") - } - if (!areColors(c.score.col)){ - c.score.col <- '#ff0000' - warning("c.score.col provided is not a valid color, default will be used: '#ff0000'") - } - if (length(s.score.col) > 1){ - s.score.col <- '#59a454' - warning("One color should be provided for s.score.col. Using default: '#59a454'") - } - if (!areColors(s.score.col)){ - s.score.col <- '#59a454' - warning("s.score.col provided is not a valid color, default will be used: '#59a454'") - } - if (length(y.score.col) > 1){ - y.score.col <- '#728fcc' - warning("One color should be provided for y.score.col. Using default: '#728fcc'") - } - if (!areColors(y.score.col)){ - y.score.col <- '#728fcc' - warning("y.score.col provided is not a valid color, default will be used: '#728fcc'") - } - if (length(t.col) > 1){ - t.col <- '#551a8b' - warning("One color should be provided for t.col. Using default: '#551a8b'") - } - if (!areColors(t.col)){ - t.col <- '#551a8b' - warning("t.col provided is not a valid color, default will be used: '#551a8b'") - } - sequence <- sub("\\*$", "", sequence) - sequence <- toupper(as.character(sequence)) - id <- as.character(id) - res <- httr::POST( - url = "http://www.cbs.dtu.dk/cgi-bin/webface2.fcgi", - encode = "form", - body = list( - `configfile` = "/usr/opt/www/pub/CBS/services/SignalP-4.1/SignalP.cf", - `SEQPASTE` = sequence, - `orgtype` = org_type, - `Dcut-type` = Dcut_type, - `Dcut-noTM` = Dcut_noTM, - `Dcut-TM` = Dcut_TM, - `graphmode` = NULL, - `format` = "long", - `minlen` = "", - `method` = method, - `trunc` = "" - )) - res <- httr::content(res, - as = "parsed") - res <- xml2::xml_find_all(res, - ".//input[@name='jobid']") - jobid <- xml2::xml_attr(res, - "value") - Sys.sleep(sleep) - repeat { - res2 <- httr::GET( - url = "http://www.cbs.dtu.dk/cgi-bin/webface2.fcgi", - query = list( - jobid = jobid, - wait = "20" - )) - - res2 <- as.character( - xml2::xml_find_all( - httr::content(res2, - as = "parsed"), - ".//pre") - ) - - res2 <- unlist(strsplit(res2, - "\n")) - if (any(grepl("Length", res2))) { - break - } - } - - tit <- res2[grep("SignalP-4.1", - res2)[1]] - tit <- gsub("#", "", tit) - tit <- trimws(tit) - isnoTM <- grepl("noTM", - res2[grep("pos", - res2)[1]-1]) - if(isnoTM) { - subtit <- paste("Networks: SignalP-noTM") - } else { - subtit <- paste("Networks: SignalP-TM") - } - res2_tab <- res2[(grep("pos", - res2)[1]+1):(grep("Measure", - res2)[1]-1)] - res2_tab <- utils::read.table(text = res2_tab, - stringsAsFactors = FALSE) - colnames(res2_tab) <- c("pos", - "aa", - "C", - "S", - "Y") - res2_out <- res2[(grep("Measure", - res2)[1]+1):(grep("Measure", - res2)[1]+4)] - - res2_out <- utils::read.table(text = res2_out, - stringsAsFactors = FALSE) - last_line <- unlist(strsplit(res2[(grep("Measure", - res2)[1]+5)], " +")) - res2_out <- rbind(res2_out[,2:4], - last_line[2:4]) - - colnames(res2_out) <- c("Measure", - "Position", - "value") - res2_out$Measure <- c("max.C", - "max.Y", - "max.S", - "mean.S", - "D") - res2_out$value <- as.numeric(as.character(res2_out$value)) - res2_out$Cutoff <- c(rep("", 4), - rev(last_line)[2]) - out <- list(prediction = res2_out, - plot = res2_tab) - graphics::plot(res2_tab$S, - ylim = c(-0.1, 1), - type = "l", - col = s.score.col, - xlab = "Position", - ylab = "Score", - yaxt = "n") - - if (missing(main)){ - maint <- paste(tit, - "\n", - subtit, - "\n", - "id: ", - id[1], - sep = "") - } else { - maint <- main - } - graphics::title(main = maint, - adj = 0, - cex.main = 1) - graphics::axis(2, yaxp = c(0, 1, 5), las = 2) - if(isnoTM) { - graphics::abline(h = Dcut_noTM, - col = t.col, - lty = 2) - } else { - graphics::abline(h = Dcut_TM, - col = t.col, - lty = 2) - } - graphics::segments(x0 = res2_tab$pos, - y0 = res2_tab$C, - y1 = 0, - col = c.score.col) - graphics::lines(res2_tab$Y, - col = y.score.col) - graphics::legend("topright", - col = c(c.score.col, - s.score.col, - y.score.col), - legend = c("C-score", - "S-score", - "Y-score"), - lty = 1) - max_pos <- max(as.numeric(res2_tab$pos)) - graphics::text(seq(1, - max_pos, - by = 1), - -0.02 , - labels = res2_tab$aa, - pos = 1, - xpd = TRUE, - cex = 0.7) - invisible(out) -} diff --git a/_pkgdown.yml b/_pkgdown.yml index a04f323..d7d05ea 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -3,10 +3,10 @@ reference: desc: > Functions for obtaining N-sp predictions based on protein sequence. contents: + - get_signalp5 - get_signalp - get_targetp - get_phobius - - plot_signalp - title: "Query GPI predictions" desc: > Functions for obtaining GPI predictions based on protein sequence. @@ -16,11 +16,14 @@ reference: - get_netGPI - title: "Annotate protein sequences" desc: > - Functions for domain and disorder prediction based on protein sequence. + Functions for domain, transmembrane region and disorder prediction based on protein sequence. contents: - get_hmm + - get_cdd - get_espritz - pfam2go + - get_phobius + - get_tmhmm - title: "Proline hydroxylation prediction" desc: ~ contents: diff --git a/docs/404.html b/docs/404.html index 6933f61..ba7cdf2 100644 --- a/docs/404.html +++ b/docs/404.html @@ -71,7 +71,7 @@ ragp - 0.3.2.0002 + 0.3.5.9000 @@ -79,7 +79,7 @@