-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathget_data.R
50 lines (40 loc) · 1.37 KB
/
get_data.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
library(countrycode)
library(tabulizer)
library(tidyverse)
convert_to_df <- function(tbl) {
df <- as_data_frame(tbl)
nm <- trimws(df[1, ])
df <- df[-1, ]
set_names(df, c("country", nm[-1]))
}
bind_tables <- function(tbls) {
tbls %>%
map(convert_to_df) %>%
bind_rows() %>%
gather(year, value, -country) %>%
mutate(year = as.numeric(year),
value = as.numeric(value),
country = trimws(country))
}
labs <- c("Transparency by Region",
"Augmented Transparency by Region",
"Regional Transparency Index (Weighted)",
"Regional Augmented Transparency Index (Weighted)")
# Main
url <- "http://eml.berkeley.edu/~eichengr/Dincer-Eichengreen_figures&tables_2014_9-4-15.pdf"
tmp <- tempfile(fileext = ".pdf")
download.file(url, tmp, mode = "wb")
tbls <- tabulizer::extract_tables(tmp)
# Extract relevant data tables (excludes regression tables)
transp <- list(1:3, 4:6, 7, 8) %>% map(~bind_tables(tbls[.]))
# Add table labels
transp <- map2_df(transp, labs, ~mutate(.x, measure = .y))
# Add country codes
transp <- transp %>%
mutate(
iso2c = countrycode(country, "country.name", "iso2c"),
iso2c = ifelse(country == "Euro Area", "EA", iso2c)
)
transp$iso2c[transp$country == "Australia and New"] <- NA
transp$country[transp$country == "Australia and New"] <- "Australia and New Zeeland"
devtools::use_data(transp)