Merge branch 'master' into master

ropensci-archive · May 1, 2019 · 9af028f · 9af028f
2 parents 0f6ca88 + c6381e6
commit 9af028f
Show file tree

Hide file tree

Showing 108 changed files with 2,372 additions and 1,577 deletions.
diff --git a/.Rbuildignore b/.Rbuildignore
@@ -23,3 +23,4 @@
 ^codecov\.yml$
 ^CRAN-RELEASE$
 ^revdep$
+^R/build_search_query\.R$
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,6 +1,6 @@
 Package: rtweet
 Type: Package
-Version: 0.6.8.9000
+Version: 0.6.9
 Title: Collecting Twitter Data
 Authors@R: c(
     person("Michael W.", "Kearney", ,
@@ -22,7 +22,9 @@ Imports:
     magrittr (>= 1.5.0),
     tibble (>= 1.3.4),
     utils,
-    progress
+    progress,
+    Rcpp,
+    httpuv
 License: MIT + file LICENSE
 URL: https://CRAN.R-project.org/package=rtweet
 BugReports: https://github.com/mkearney/rtweet/issues
@@ -34,9 +36,12 @@ Suggests:
     openssl,
     readr,
     rmarkdown,
-    testthat,
+    testthat (>= 2.1.0),
     webshot,
-    covr
+    covr,
+    igraph
 VignetteBuilder: knitr
 LazyData: yes
-RoxygenNote: 6.1.0.9000
+RoxygenNote: 6.1.1
+LinkingTo: 
+    Rcpp
diff --git a/NAMESPACE b/NAMESPACE
@@ -79,6 +79,8 @@ export(lookup_tweets)
 export(lookup_users)
 export(max_id)
 export(my_friendships)
+export(network_data)
+export(network_graph)
 export(next_cursor)
 export(parse_stream)
 export(plain_tweets)
@@ -115,6 +117,7 @@ export(unflatten)
 export(users_data)
 export(users_with_tweets)
 export(write_as_csv)
+importFrom(Rcpp,sourceCpp)
 importFrom(graphics,legend)
 importFrom(httr,GET)
 importFrom(httr,POST)
@@ -132,3 +135,4 @@ importFrom(jsonlite,stream_in)
 importFrom(magrittr,"%>%")
 importFrom(utils,read.csv)
 importFrom(utils,write.csv)
+useDynLib(rtweet, .registration = TRUE)
diff --git a/NEWS.md b/NEWS.md
@@ -1,4 +1,7 @@
-# rtweet 0.6.8.9000
+# rtweet 0.6.9
+- Better tweet-validating in streaming data–interrupted statuses/broken lines 
+  are now returned
+- Added network-graph convenience functions `network_data()` and `network_graph()`
 
 # rtweet 0.6.8
 - Users can now create read-only using the built-in rtweet client!
@@ -43,7 +46,7 @@
   suggested users data.
 - Various bug fixes and stability improvements.
 - Significant upgrades to `save_as_csv()`, including addition of new 
-  `prep_as_csv()` as convience function for flattening Twitter data frames.
+  `prep_as_csv()` as convenience function for flattening Twitter data frames.
 - Tokens have been retooled. For at least the time being, users must 
   create a Twitter app in order to be authorized to interact with the 
   REST and stream APIs.
@@ -54,7 +57,7 @@
   This means functions now return a more consistent and intuitive 
   data object where one row is always equal to one tweet. 
 - Overhauled `save_as_csv()` with improved flattening and ID-preserving 
-  saving methods. THe function now saves a single [joined] data set as 
+  saving methods. The function now saves a single [joined] data set as 
   well.
 - Fixed major bugs in `get_favorites()` and in several `lists_*()` 
   functions.
@@ -70,7 +73,7 @@
 - Added `stream_tweets2()` function for more robust streaming
   method. Streams JSON files to directory and reconnects following
   premature disruptions.
-- Various bug fixes nad numerous documentation improvements.
+- Various bug fixes and numerous documentation improvements.
 
 # rtweet 0.5.0
 - Added access to direct messages, mentions, list subscriptions, list
@@ -165,7 +168,7 @@
 * More bug fixes and various improvements.
 * The `ts_plot()` function is now more robust with more adaptive
   characteristics for variations in the number of filters, the method
-  of distiguishing lines, the position of the legend, and the
+  of distinguishing lines, the position of the legend, and the
   aesthetics of the themes.
 * Added `ts_filter()` function which allows users to convert Twitter
   data into a time series-like data frame. Users may also provide
@@ -187,7 +190,7 @@ assisting functions.
 * Major improvements to ts_plot including SIX different
 themes from which users may choose
 * More parsing fixes and misc stability improvements
-* Minor renamig of variables along with returning more
+* Minor renaming of variables along with returning more
 variables overall
 
 # rtweet 0.3.9
@@ -318,7 +321,7 @@ printing entire data frames quickly becomes headache-inducing.
 
 * Added new trends functions. Find what trending locations are
 available with `trends_available()` and/or search for trends
-worldwide or by geogaphical location using `get_trends()`.
+worldwide or by geographical location using `get_trends()`.
 
 * Stability improvements including integration with Travis CI and
 code analysis via codecov. Token encryption method also means API
@@ -339,8 +342,8 @@ attribute.
 tweets data attribute.
 
 * To access users data from a tweets object or vice-versa, use
-`users_data()` and `tweets_data()` functions on objects outputed
-by major rtweet retrieval functions.
+`users_data()` and `tweets_data()` functions on objects output by major 
+rtweet retrieval functions.
 
 * Updated testthat tests
 

diff --git a/R/.gitignore b/R/.gitignore
@@ -0,0 +1 @@
+build_search_query.R
diff --git a/R/RcppExports.R b/R/RcppExports.R
@@ -0,0 +1,11 @@
+# Generated by using Rcpp::compileAttributes() -> do not edit by hand
+# Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393
+
+unroll_connections <- function(from, to) {
+    .Call(`_rtweet_unroll_connections`, from, to)
+}
+
+unroll_users <- function(x) {
+    .Call(`_rtweet_unroll_users`, x)
+}
+
diff --git a/R/followers.R b/R/followers.R
@@ -51,6 +51,11 @@
 #'   reset. Users should monitor and test this before making
 #'   especially large calls as any systematic issues could create
 #'   sizable inefficiencies.
+#'
+#'   At this time, results are ordered with the most recent following first —
+#'   however, this ordering is subject to unannounced change and eventual
+#'   consistency issues. While this remains true it is possible iteratively build
+#'   follower lists for a user over time.
 #' @seealso
 #'   \url{https://developer.twitter.com/en/docs/accounts-and-users/follow-search-get-users/api-reference/get-followers-ids}
 #' @examples

diff --git a/R/friends.R b/R/friends.R
@@ -67,6 +67,11 @@
 #'   for a second time) until the next rate limit reset. Users should monitor
 #'   and test this before making especially large calls as any systematic issues
 #'   could create sizable inefficiencies.
+#'
+#'   At this time, results are ordered with the most recent following first —
+#'   however, this ordering is subject to unannounced change and eventual
+#'   consistency issues. While this remains true it is possible iteratively build
+#'   friends lists for a user over time.
 #' @return A tibble data frame with two columns, "user" for name or ID of target
 #'   user and "user_id" for follower IDs.
 #' @family ids
@@ -199,19 +204,19 @@ get_friends_ <- function(users,
     )
     ## if !retryonratelimit then if necessary exhaust what can with token
     f <- get_friend(url, token = token)
-      if (has_name_(f, "errors")) {
-        warning(f$errors[["message"]], call. = FALSE)
-        return(list(data.frame()))
-      } else if (parse) {
-        nextcursor <- next_cursor(f)
-        if (length(f[["ids"]]) == 0) {
-          f <- tibble::as_tibble()
-        } else {
-          f <- tibble::as_tibble(
-            list(user = users, user_id = f[["ids"]]))
-          attr(f, "next_cursor") <- nextcursor
-        }
+    if (has_name_(f, "errors")) {
+      warning(f$errors[["message"]], call. = FALSE)
+      return(list(data.frame()))
+    } else if (parse) {
+      nextcursor <- f[["next_cursor"]]
+      if (length(f[["ids"]]) == 0) {
+        f <- tibble::as_tibble()
+      } else {
+        f <- tibble::as_tibble(
+          list(user = users, user_id = f[["ids"]]))
+        attr(f, "next_cursor") <- nextcursor
       }
+    }
   }
   f
 }
@@ -280,9 +285,9 @@ my_friendships <- function(user,
 
 
 lookup_friendships_ <- function(source,
-                              target,
-                              parse = TRUE,
-                              token = NULL) {
+                                target,
+                                parse = TRUE,
+                                token = NULL) {
   stopifnot(is.atomic(source), is.atomic(target))
   token <- check_token(token)
   query <- "friendships/show"
@@ -346,7 +351,7 @@ parse_showfriendships <- function(x, source_user, target_user) {
   }
   if (has_name_(x, "source")) {
     src <- unlist(x$source)
-    src <- tibble::data_frame(
+    src <- tibble::tibble(
       relationship = "source",
       user = target_user,
       variable = names(src),
@@ -357,7 +362,7 @@ parse_showfriendships <- function(x, source_user, target_user) {
   }
   if (has_name_(x, "target")) {
     trg <- unlist(x$target)
-    trg <- tibble::data_frame(
+    trg <- tibble::tibble(
       relationship = "target",
       user = source_user,
       variable = names(trg),

diff --git a/R/graph-network.R b/R/graph-network.R
@@ -0,0 +1,155 @@
+id_sn_index <- function(x) {
+  id <- character()
+  sn <- character()
+  if ("mentions_user_id" %in% names(x)) {
+    id <- unroll_users(x$mentions_user_id)
+    sn <- unroll_users(x$mentions_screen_name)
+    x$mentions_user_id <- NULL
+    x$mentions_screen_name <- NULL
+  }
+  id_vars <- grep("user_id$", names(x), value = TRUE)
+  sn_vars <- grep("screen_name$", names(x), value = TRUE)
+  id <- c(id, unlist(x[id_vars], use.names = FALSE))
+  sn <- c(sn, unlist(x[sn_vars], use.names = FALSE))
+  kp <- !duplicated(id) & !is.na(id)
+  list(id = id[kp], sn = sn[kp])
+}
+
+
+id_sn_join <- function(x, ref) {
+  m <- match(x, ref$id)
+  ref$sn[m]
+}
+
+prep_from_to <- function(x, from, to) {
+  if (is.list(x[[to]])) {
+    unroll_connections(x[[from]], x[[to]])
+  } else {
+    x <- x[c(from, to)]
+    names(x) <- c("from", "to")
+    x <- x[!is.na(x[[2]]), ]
+    x
+  }
+}
+
+#' Network data
+#'
+#' Convert Twitter data into a network-friendly data frame
+#'
+#' @return A from/to data edge data frame
+#' @details \code{network_data} returns a data frame that can easily be converted to
+#'   various network classes. For direct conversion to a network object, see
+#'  \code{\link{network_graph}}.
+#' @seealso network_graph
+#' @examples
+#'
+#' \dontrun{
+#'   ## search for #rstats tweets
+#'   rstats <- search_tweets("#rstats", n = 200)
+#'
+#'   ## create from-to data frame representing retweet/mention/reply connections
+#'   rstats_net <- network_data(rstats, "retweet,mention,reply")
+#'
+#'   ## view edge data frame
+#'   rstats_net
+#'
+#'   ## view user_id->screen_name index
+#'   attr(rstats_net, "idsn")
+#'
+#'   ## if igraph is installed...
+#'   if (requireNamespace("igraph", quietly = TRUE)) {
+#'
+#'     ## (1) convert directly to graph object representing semantic network
+#'     rstats_net <- network_graph(rstats)
+#'
+#'     ## (2) plot graph via igraph.plotting
+#'     plot(rstats_net)
+#'   }
+#' }
+#' @rdname network_graph
+#' @export
+network_data <- function(.x, .e = c("mention,retweet,reply,quote")) {
+  if (isTRUE(.e)) {
+    .e <- "all"
+  }
+  stopifnot(is.character(.e))
+  .e <- sub("d$|s$", "", tolower(unlist(strsplit(.e, "[[:punct:] ]+"))))
+  if (length(.e) == 1 &&  grepl("^all$|^semantic$", .e, ignore.case = TRUE)) {
+    .e <- c("mention", "retweet", "reply", "quote")
+  }
+  .x <- lapply(.e, network_data_one, .x)
+  idsn <- lapply(.x, attr, "idsn")
+  idsn <- list(
+    id = unlist(lapply(idsn, `[[`, "id"), use.names = FALSE),
+    sn = unlist(lapply(idsn, `[[`, "sn"), use.names = FALSE)
+  )
+  idsn$sn <- idsn$sn[!duplicated(idsn$id)]
+  idsn$id <- idsn$id[!duplicated(idsn$id)]
+  .x <- do.call(rbind, .x)
+  attr(.x, "idsn") <- idsn
+  .x
+}
+
+network_data_one <- function(.e, .x) {
+  stopifnot(.e %in% c("mention", "retweet", "reply", "quote"))
+  vars <- c("user_id", "screen_name", switch(.e,
+    mention = c("mentions_user_id", "mentions_screen_name"),
+    retweet = c("retweet_user_id",  "retweet_screen_name"),
+    reply =   c("reply_to_user_id", "reply_to_screen_name"),
+    quote =   c("quoted_user_id",   "quoted_screen_name")))
+  .x <- .x[, vars]
+  idsn <- id_sn_index(.x)
+  v <- names(.x)
+  .x <- prep_from_to(.x, v[1], v[3])
+  if (nrow(.x) > 0) {
+    .x$type <- .e
+  }
+  attr(.x, "idsn") <- idsn
+  .x
+}
+
+#' Network graph
+#'
+#' Convert Twitter data into network graph object (igraph)
+#'
+#' @param .x Data frame returned by rtweet function
+#' @param .e Type of edge/link–i.e., "mention", "retweet", "quote", "reply".
+#'   This must be a character vector of length one or more. This value will be
+#'   split on punctuation and space (so you can include multiple types in the
+#'   same string separated by a comma or space). The values "all" and
+#'   "semantic" are assumed to mean all edge types, which is equivalent to the
+#'   default value of \code{c("mention,retweet,reply,quote")}
+#' @return An igraph object
+#' @details \code{network_graph} requires previous installation of the igraph package.
+#'   To return a network-friendly data frame, see \code{\link{network_data}}
+#' @seealso network_data
+#' @export
+network_graph <- function(.x, .e = c("mention,retweet,reply,quote")) {
+  if (!requireNamespace("igraph", quietly = TRUE)) {
+    stop(
+      "Please install the {igraph} package to use this function",
+      call. = FALSE
+    )
+  }
+  if (isTRUE(.e)) {
+    .e <- "all"
+  }
+  stopifnot(is.character(.e))
+  .e <- sub("d$|s$", "", tolower(unlist(strsplit(.e, "[[:punct:] ]+"))))
+  if (length(.e) == 1 &&  grepl("^all$|^semantic$", .e, ignore.case = TRUE)) {
+    .e <- c("mention", "retweet", "reply", "quote")
+  }
+  .x <- network_data(.x, .e)
+  idsn <- attr(.x, "idsn")
+  g <- igraph::make_empty_graph(n = 0, directed = TRUE)
+  g <- igraph::add_vertices(g, length(idsn$id),
+    attr = list(id = idsn$id, name = idsn$sn))
+  edges <- rbind(match(.x[[1]], idsn$id), match(.x[[2]], idsn$id))
+  igraph::add_edges(g, edges, attr = list(type = .x[[3]]))
+}
+
+# user_vars <- c("user_id", "screen_name", "name", "location", "description",
+#   "url", "protected", "followers_count", "friends_count", "listed_count",
+#   "statuses_count", "favourites_count", "account_created_at", "verified",
+#   "profile_url", "profile_expanded_url", "account_lang",
+#   "profile_banner_url", "profile_background_url", "profile_image_url")