This repository has been archived by the owner on Nov 10, 2024. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 198
/
Copy pathsearch_tweets.R
208 lines (196 loc) · 7.5 KB
/
search_tweets.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
#' Get tweets data on statuses identified via search query.
#'
#' Returns Twitter statuses matching a user provided search
#' query. `r lifecycle::badge("deprecated")`
#'
#' @param q Query to be searched, used to filter and select tweets to
#' return from Twitter's REST API. Must be a character string not to
#' exceed maximum of 500 characters. Spaces behave like boolean
#' "AND" operator. To search for tweets containing at least one of
#' multiple possible terms, separate each search term with spaces
#' and "OR" (in caps). For example, the search `q =
#' "data science"` looks for tweets containing both "data" and
#' "science" located anywhere in the tweets and in any order.
#' When "OR" is entered between search terms, `query =
#' "data OR science"`, Twitter's REST API should return any tweet
#' that contains either "data" or "science." It is also possible to
#' search for exact phrases using double quotes. To do this, either
#' wrap single quotes around a search query using double quotes,
#' e.g., `q = '"data science"'` or escape each internal double
#' quote with a single backslash, e.g., `q =
#' "\"data science\""`.
#'
#' Some other useful query tips:
#'
#' \itemize{
#' \item Exclude retweets via `"-filter:retweets"`
#' \item Exclude quotes via `"-filter:quote"`
#' \item Exclude replies via `"-filter:replies"`
#' \item Filter (return only) verified via `"filter:verified"`
#' \item Exclude verified via `"-filter:verified"`
#' \item Get everything (firehose for free) via `"-filter:verified OR filter:verified"`
#' \item Filter (return only) tweets with links to news articles via `"filter:news"`
#' \item Filter (return only) tweets with media `"filter:media"`
#' }
#'
#' @inheritParams TWIT_paginate_max_id
#' @inheritParams stream
#' @param type Character string specifying which type of search
#' results to return from Twitter's REST API. The current default is
#' `type = "recent"`, other valid types include `type =
#' "mixed"` and `type = "popular"`.
#' @param geocode Geographical limiter of the template
#' "latitude,longitude,radius" e.g., `geocode =
#' "37.78,-122.40,1mi"`.
#' @param include_rts Logical, indicating whether to include retweets
#' in search results. Retweets are classified as any tweet generated
#' by Twitter's built-in "retweet" (recycle arrows) function. These
#' are distinct from quotes (retweets with additional text provided
#' from sender) or manual retweets (old school method of manually
#' entering "RT" into the text of one's tweets).
#' @param ... Further arguments passed as query parameters in request
#' sent to Twitter's REST API. To return only English language
#' tweets, for example, use `lang = "en"`. For more options see
#' Twitter's API documentation.
#' @details Twitter API documentation recommends limiting searches to
#' 10 keywords and operators. Complex queries may also produce API
#' errors preventing recovery of information related to the query.
#' It should also be noted Twitter's search API does not consist of
#' an index of all Tweets. At the time of searching, the search API
#' index includes between only 6-9 days of Tweets.
#' @return List object with tweets and users each returned as a
#' data frame.
#' @family tweets
#' @seealso [tweet_search_recent()], [tweet_search_all()], [`rtweet-deprecated`]
#' @export
#' @references <https://developer.twitter.com/en/docs/twitter-api/v1/tweets/search/api-reference/get-search-tweets>
search_tweets <- function(q, n = 100,
type = c("mixed", "recent", "popular"),
include_rts = TRUE,
geocode = NULL,
since_id = NULL,
max_id = NULL,
parse = TRUE,
token = NULL,
retryonratelimit = NULL,
verbose = TRUE,
...) {
stopifnot(is_n(n))
params <- search_params(q,
type = type,
include_rts = include_rts,
geocode = geocode,
...
)
result <- TWIT_paginate_max_id(token, "/1.1/search/tweets", params,
get_id = function(x) x$statuses$id_str,
page_size = 100,
n = n,
since_id = since_id,
max_id = max_id,
retryonratelimit = retryonratelimit,
verbose = verbose
)
if (parse) {
tweets <- lapply(result, "[[", "statuses")
result <- tweets_with_users(tweets)
result$created_at <- format_date(result$created_at)
}
result
}
search_params <- function(q,
type = c("mixed", "recent", "popular"),
include_rts = TRUE,
geocode = NULL,
max_id = NULL,
...) {
if (missing(q) && !is.null(geocode)) {
q <- ""
}
stopifnot(is.atomic(q) && !is.null(q) && length(q) == 1L,
is.atomic(max_id) && length(max_id) <= 1L)
stopifnot(is.atomic(q), length(q) == 1L, is.atomic(max_id))
type <- arg_match(type)
## validate query length–char count might not always be same here as with
## Twitter, so set this to 600 and let Twitter reject others
if (nchar(q) > 600) {
stop("q cannot exceed 600 characters.", call. = FALSE)
}
if (!include_rts) {
q <- paste0(q, " -filter:retweets")
}
if (!is.null(geocode) && inherits(geocode, "coords")) {
mls1 <- abs(geocode$box[2] - geocode$box[4]) * 69
mls2 <- abs(geocode$box[1] - geocode$box[3]) *
(69 - abs(.093 * geocode$point[1])^2)
mls <- (mls1/1.8 + mls2/1.8) / 1.8
mls <- round(mls, 3)
geocode <- paste0(paste(geocode$point, collapse = ","), ",", mls, "mi")
}
list(
q = q,
result_type = type,
max_id = max_id,
tweet_mode = "extended",
include_ext_alt_text = "true",
geocode = geocode,
...
)
}
#' Search tweets (vectorized)
#'
#' search_tweets2 Passes all arguments to search_tweets. Returns data from
#' one OR MORE search queries.
#' `r lifecycle::badge("deprecated")`
#'
#' @return A tbl data frame with additional "query" column.
#' @rdname search_tweets
#' @seealso [tweet_search_recent()], [`rtweet-deprecated`]
#' @export
search_tweets2 <- function(...) {
dots <- match_fun(list(...), "search_tweets")
q <- dots[["q"]]
dots[["q"]] <- NULL
## is parse = TRUE?
parse <- dots[["parse"]]
## search for each string in column of queries
rt <- Map("search_tweets", q, MoreArgs = dots)
## if parse is false, return rt
if (!parse) {
return(rt)
}
## deal with queries that returned zero tweets
kp <- lengths(rt) > 0L
if (sum(kp, na.rm = TRUE) == 0L) return(data.frame())
rt <- rt[kp]
q <- q[kp]
## add query variable to data frames
rt <- Map("add_var", rt, query = q)
## merge users data into one data frame
do_call_rbind(rt)
}
add_var <- function(x, ...) {
dots <- list(...)
if (!is.null(names(dots))) {
varname <- names(dots)
} else {
varname <- deparse(substitute(...))
}
x[[varname]] <- unlist(dots, use.names = FALSE)
x
}
match_fun <- function(dots, fun) {
rfuns <- names(formals(fun))
nms <- match(names(dots), rfuns)
nms[names(dots) != ""] <- names(dots)[names(dots) != ""]
is_na <- function(x) is.na(x) | x == "NA"
nms[is_na(nms) & names(dots) == ""] <- names(
formals(fun))[which(is_na(nms) & names(dots) == "")]
names(dots) <- nms
names(dots)[is.na(names(dots))] <- ""
fmls <- formals(fun)
dotsdots <- dots[!names(dots) %in% names(fmls)]
dots <- dots[names(dots) %in% names(fmls)]
fmls <- fmls[!names(fmls) %in% names(dots) & names(fmls) != "..."]
c(dots, fmls, dotsdots)
}