From e3c326b159d8ff42102fcfc6943ad33bf099caca Mon Sep 17 00:00:00 2001 From: HDash <16350928+HDash@users.noreply.github.com> Date: Tue, 3 Dec 2024 12:13:34 +0000 Subject: [PATCH] Handle more than 100 results from repo branch query --- DESCRIPTION | 11 +++-- NEWS.md | 10 ++++- R/github_branches.R | 67 +++++++++++++++++------------- README.md | 9 ++-- man/description_extract.Rd | 6 +++ man/github_branches.Rd | 10 ++--- tests/testthat/test-github_files.R | 14 +++---- 7 files changed, 78 insertions(+), 49 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 96d23f4..d770e48 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: echogithub Type: Package Title: echoverse module: Extract data and metadata from GitHub -Version: 0.99.2 +Version: 0.99.3 Authors@R: c(person(given = "Brian", family = "Schilder", @@ -17,7 +17,12 @@ Authors@R: family = "Raj", role = c("aut"), email = "towfique.raj@mssm.edu", - comment = c(ORCID = "0000-0002-9355-5704")) + comment = c(ORCID = "0000-0002-9355-5704")), + person(given = "Hiranyamaya", + family = "Dash", + role = c("ctb"), + email = "hdash.work@gmail.com", + comment = c(ORCID = "0009-0005-5514-505X")) ) Description: echoverse module: Extract data and metadata from GitHub. URL: https://github.com/RajLabMSSM/echogithub @@ -56,7 +61,7 @@ Suggests: Remotes: github::neurogenomics/cranlogs, github::neurogenomics/rworkflows -RoxygenNote: 7.2.3 +RoxygenNote: 7.3.2 VignetteBuilder: knitr License: GPL-3 Config/testthat/edition: 3 diff --git a/NEWS.md b/NEWS.md index 17aa2fc..ef20d70 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,11 @@ +# echogithub 0.99.3 + +## Bug fixes + +* `github_branches` + - Handle repos with more than 100 branches. + + # echogithub 0.99.2 ## New features @@ -68,4 +76,4 @@ * Switched to using `gh` instead of `httr` to avoid API limits imposed by GitHub. - Kept `httr` as alternative method. * `is_url`: - - Add `RCurl::url.exists` check. \ No newline at end of file + - Add `RCurl::url.exists` check. diff --git a/R/github_branches.R b/R/github_branches.R index 300172d..b82c64f 100644 --- a/R/github_branches.R +++ b/R/github_branches.R @@ -1,22 +1,22 @@ #' GitHub branches -#' +#' #' List all branches for a given GitHub repository. -#' @param owner Owner of the GitHub repository. +#' @param owner Owner of the GitHub repository. #' If \code{NULL}, will automatically try to infer the owner #' name from the \emph{DESCRIPTION file} #' (assuming you're working directory is a local R package repo). -#' @param repo GitHub repository name. +#' @param repo GitHub repository name. #' If \code{NULL}, will automatically try to infer the repo name #' name from the \emph{DESCRIPTION file} #' (assuming you're working directory is a local R package repo). -#' @param branch [Optional] If \code{branch} is supplied -#' (as a character vector of one or more branch names), +#' @param branch [Optional] If \code{branch} is supplied +#' (as a character vector of one or more branch names), #' will check to see if that branch exists. If it does, only that branch will #' be returned. If it doesn't, an error will be thrown. #' @param master_or_main If \code{branch} is supplied and #' is either \code{"master"} or \code{"main"}, -#' automatically interpret "master" and "main" as synonymous and return -#' whichever branch exists. +#' automatically interpret "master" and "main" as synonymous and return +#' whichever branch exists. #' @param as_datatable Return the results as a \link[data.table]{data.table} #' (\code{TRUE}), or as a character vector of branch names #' (default: \code{FALSE}). @@ -24,10 +24,10 @@ #' @inheritParams github_files #' @inheritParams description_extract #' @returns Character vector or \link[data.table]{data.table} of branches. -#' +#' #' @export #' @importFrom gh gh_token gh -#' @examples +#' @examples #' branches <- github_branches(owner="RajLabMSSM", repo="echolocatoR") github_branches <- function(owner = NULL, repo = NULL, @@ -37,15 +37,15 @@ github_branches <- function(owner = NULL, token = gh::gh_token(), desc_file = NULL, error = FALSE, - verbose = TRUE){ + verbose = TRUE){ name <- NULL; - + out <- infer_owner_repo(owner = owner, - repo = repo, - desc_file = desc_file, + repo = repo, + desc_file = desc_file, verbose = verbose) owner <- out$owner - repo <- out$repo + repo <- out$repo #### Search branches #### messager("Searching for all branches in:",paste(owner,repo,sep="/"), v=verbose) @@ -53,20 +53,29 @@ github_branches <- function(owner = NULL, "https://api.github.com/repos",owner,repo,"branches", sep="/" ) - gh_response <- gh::gh(endpoint = endpoint, - .token = token, - per_page = 100) - dt <- gh_to_dt(gh_response) - dt <- cbind(owner=owner, repo=repo, dt) - #### Filter branches #### - if(!is.null(branch)){ - #### Detect synonymous branches #### - if(isTRUE(master_or_main) && - any(c("master","main") %in% branch)){ - branch <- unique(c("master","main",branch)) - } - dt <- dt[name %in% branch,] + page <- 1 + repeat { + # Keep iterating pages until we find the branch or run out of pages + gh_response <- gh::gh(endpoint = endpoint, + .token = token, + per_page = 100, + page = page) + if(length(gh_response) == 0) break + dt <- gh_to_dt(gh_response) + dt <- cbind(owner=owner, repo=repo, dt) + #### Filter branches #### + if(!is.null(branch)){ + #### Detect synonymous branches #### + if(isTRUE(master_or_main) && + any(c("master","main") %in% branch)){ + branch <- unique(c("master","main",branch)) + } + dt <- dt[name %in% branch,] + } + if(nrow(dt)>0) break + page <- page + 1 } + #### Report #### if(nrow(dt)>0){ messager(paste0( @@ -78,9 +87,9 @@ github_branches <- function(owner = NULL, if(isTRUE(error)) { stop(stp) } else { - messager("WARNING:",stp,"Returning NULL.",v=verbose) + messager("WARNING:",stp,"Returning NULL.",v=verbose) return(NULL) - } + } } #### Return #### if(isTRUE(as_datatable)){ diff --git a/README.md b/README.md index 09dd7b1..376a8e2 100644 --- a/README.md +++ b/README.md @@ -1,19 +1,20 @@
[![License: GPL-3](https://img.shields.io/badge/license-GPL--3-blue.svg)](https://cran.r-project.org/web/licenses/GPL-3) -[![](https://img.shields.io/badge/devel%20version-0.99.2-black.svg)](https://github.com/RajLabMSSM/echogithub) +[![](https://img.shields.io/badge/devel%20version-0.99.3-black.svg)](https://github.com/RajLabMSSM/echogithub) [![](https://img.shields.io/github/languages/code-size/RajLabMSSM/echogithub.svg)](https://github.com/RajLabMSSM/echogithub) [![](https://img.shields.io/github/last-commit/RajLabMSSM/echogithub.svg)](https://github.com/RajLabMSSM/echogithub/commits/master)
[![R build status](https://github.com/RajLabMSSM/echogithub/workflows/rworkflows/badge.svg)](https://github.com/RajLabMSSM/echogithub/actions) -[![](https://codecov.io/gh/RajLabMSSM/echogithub/branch/master/graph/badge.svg)](https://codecov.io/gh/RajLabMSSM/echogithub) +[![](https://codecov.io/gh/RajLabMSSM/echogithub/branch/master/graph/badge.svg)](https://app.codecov.io/gh/RajLabMSSM/echogithub)

-Authors: Brian Schilder, Jack Humphrey, Towfique Raj +Authors: Brian Schilder, Jack Humphrey, Towfique Raj, Hiranyamaya +Dash

-README updated: Mar-10-2023 +README updated: Dec-03-2024
## `echogithub`: Extract data and metadata from GitHub. diff --git a/man/description_extract.Rd b/man/description_extract.Rd index 6cd4513..632da4f 100644 --- a/man/description_extract.Rd +++ b/man/description_extract.Rd @@ -43,12 +43,18 @@ For example: Arguments passed on to \code{\link[rworkflows:get_description]{rworkflows::get_description}} \describe{ \item{\code{paths}}{Paths to \emph{DESCRIPTION} file(s) R package(s).} + \item{\code{db}}{A \link[data.table]{data.table} of R package metadata generated by +\link[BiocPkgTools]{biocPkgList}.} \item{\code{cache_dir}}{Directory where to cache downloaded files.} \item{\code{force_new}}{Ignore cached files and re-download them instead.} \item{\code{use_wd}}{Search the local working directory (and the one above it) for \emph{DESCRIPTION} files.} \item{\code{use_repos}}{Use R standard R package repositories like CRAN and Bioc to find \emph{DESCRIPTION} files.} + \item{\code{repo}}{\code{character(1)} The requested Bioconductor repository. The default +is to pull from the "BioCsoft" repository. Possible repositories include +"BioCsoft", "BioCexp", "BioCworkflows", "BioCann", and "CRAN". Note that +not all repos are available for all versions, particularly older versions.} }} } \value{ diff --git a/man/github_branches.Rd b/man/github_branches.Rd index 646e875..1c5ff36 100644 --- a/man/github_branches.Rd +++ b/man/github_branches.Rd @@ -17,24 +17,24 @@ github_branches( ) } \arguments{ -\item{owner}{Owner of the GitHub repository. +\item{owner}{Owner of the GitHub repository. If \code{NULL}, will automatically try to infer the owner name from the \emph{DESCRIPTION file} (assuming you're working directory is a local R package repo).} -\item{repo}{GitHub repository name. +\item{repo}{GitHub repository name. If \code{NULL}, will automatically try to infer the repo name name from the \emph{DESCRIPTION file} (assuming you're working directory is a local R package repo).} -\item{branch}{[Optional] If \code{branch} is supplied -(as a character vector of one or more branch names), +\item{branch}{[Optional] If \code{branch} is supplied +(as a character vector of one or more branch names), will check to see if that branch exists. If it does, only that branch will be returned. If it doesn't, an error will be thrown.} \item{master_or_main}{If \code{branch} is supplied and is either \code{"master"} or \code{"main"}, -automatically interpret "master" and "main" as synonymous and return +automatically interpret "master" and "main" as synonymous and return whichever branch exists.} \item{as_datatable}{Return the results as a \link[data.table]{data.table} diff --git a/tests/testthat/test-github_files.R b/tests/testthat/test-github_files.R index 98a8804..696e078 100644 --- a/tests/testthat/test-github_files.R +++ b/tests/testthat/test-github_files.R @@ -1,9 +1,9 @@ -test_that("github_files works", { +test_that("github_files works", { - # files <- github_files(owner = "RajLabMSSM", - # repo = "Fine_Mapping_Shiny", - # query = ".md$", - # download = TRUE) - # testthat::expect_true(methods::is(files, "data.table")) - # testthat::expect_true(nrow(files)>=1) + files <- github_files(owner = "RajLabMSSM", + repo = "Fine_Mapping_Shiny", + query = ".md$", + download = TRUE) + testthat::expect_true(methods::is(files, "data.table")) + testthat::expect_true(nrow(files)>=1) })