-
Notifications
You must be signed in to change notification settings - Fork 1
/
Meeting2Code.r
221 lines (177 loc) · 6.64 KB
/
Meeting2Code.r
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
###############
## Slide 2
###############
## If necessary, use setwd() to get to the right directory
ebola <- read.table("country_timeseries.csv", sep = ",",
header = TRUE)
ebola[1:3, 1:5]
###############
## Slide 6
###############
## Make some new vectors
x <- c(1, 5, 7, 9, 10)
x
class.names <- c("Taylor", "Maggie", "Mimi", "Brianna", "Jon")
class.names
###############
## Slide 8
###############
## Make a new dataframe
class.data <- data.frame(name = class.names,
number = x)
class.data
###############
## Slide 11
###############
## Pull subsets from the class.names vector
class.names[1]
class.names[c(2, 3, 4)]
class.names[2:4]
###############
## Slide 12
###############
## Pull subsets from the class.data dataframe
class.data[1,1]
class.data[1:3, 1:2]
###############
## Slide 13
###############
## Pull subsets from the class.data dataframe
class.data[1, ]
class.data[ , 1]
###############
## Slide 14
###############
## Pull subsets from the class.data dataframe
class.data[3:4, "number"]
class.data[3:4, c("name", "number")]
###############
## Slide 15
###############
## Pull a column from the class.data dataframe
head(ebola$Cases_Guinea)
###############
## Slide 19
###############
## Try out some functions
head(ebola)
head(ebola, n = 3)
ebola <- read.table("country_timeseries.csv", sep = ",",
header = TRUE)
###############
## Slide 22
###############
## Do a basic plot
x <- c(1, 2, 3)
y <- c(4, 5, 6)
plot(x, y)
###############
## Slide 22
###############
## Plot ebola cases in Guinea
plot(ebola$Day, ebola$Cases_Guinea)
###############
## Slide 24
###############
## Plot ebola cases in Guinea with some of the plot options
plot(ebola$Day, ebola$Cases_Guinea, main = "Guinea ebola cases",
xlab = "Day in ebola data collection", ylab = "# of cases",
type = "l", lwd = 2, col = "gray")
#####################################################################
## "Try It Yourself" answers
#####################################################################
#######################
## Answers for Slide 16
#######################
## Find out the column names for the dataframe
colnames(ebola)
## Pull out the "Date" column:
ebola$Date
ebola.Date <- ebola$Date # If you wanted to save this column as a
# new vector named "ebola.Date"
## Get the ten most recent US cases
ebola$Cases_UnitedStates[1:10] #or
ebola[1:10, "Cases_UnitedStates"]
## More advanced: If you wanted to get the ten latest non-missing ones, you could
## ask R to pull out the ten most recent that are non-missing like this:
US.ebola <- subset(ebola, !(is.na(Cases_UnitedStates)))
US.ebola[1:10, "Cases_UnitedStates"]
## Get the twenty earliest death counts in Liberia
nrow(ebola) #Find out how many rows are in the dataset
# For me, this was 94. I just pulled the ebola data today, though, so I may
# have more observations than you
ebola$Deaths_Liberia[85:94] #or
ebola[85:94, "Deaths_Liberia"]
# A fancier way to do this, where it will still work even if more observations are
# added to the data, is to use the "nrow(ebola)" call directly in your indexing:
ebola[(nrow(ebola) - 9):(nrow(ebola)), "Deaths_Liberia"]
# To convince yourself of why this works, check what you get from:
(nrow(ebola) - 9)
(nrow(ebola))
(nrow(ebola) - 9):(nrow(ebola))
## Get a dataframe of the first five observations of date, cases, and deaths in Mali
ebola[1:5, c("Date", "Cases_Mali", "Deaths_Mali")]
## You can use the same ideas as before if you want to get the first five non-missing
## cases
Mali.ebola <- subset(ebola, !(is.na(Cases_Mali)))
Mali.ebola[ , c("Date", "Cases_Mali", "Deaths_Mali")]
## There are only four non-missing observations for Mali, so we don't need to index 1:5
#######################
## Answers for Slide 26
#######################
## Plot deaths in Liberia by day
## Most basic:
plot(ebola$Day, ebola$Deaths_Liberia)
## Try using a line instead
plot(ebola$Day, ebola$Deaths_Liberia, type = "l")
## Make a subset with non-missing data from Liberia
ebola.Liberia <- subset(ebola, !(is.na(Cases_Liberia)) & !(is.na(Deaths_Liberia)),
select = c("Date", "Day", "Cases_Liberia", "Deaths_Liberia"))
plot(ebola.Liberia$Day, ebola.Liberia$Cases_Liberia,
type = "l", col = "gray", lwd = 2)
## Get a little fancier
plot(ebola.Liberia$Day, ebola.Liberia$Cases_Liberia,
type = "l", col = "gray", lwd = 2,
xlab = "Day in ebola tracking",
ylab = "Count",
main = "Ebola in Liberia")
## Add a line for deaths in Liberia
lines(ebola.Liberia$Day, ebola.Liberia$Deaths_Liberia) ## check out the lines function, `?lines`
## Plot the percent mortality in Liberia
perc.mort.Liberia <- ebola.Liberia$Deaths_Liberia / ebola.Liberia$Cases_Liberia
plot(ebola.Liberia$Day, perc.mort.Liberia, type = "l",
xlab = "Day in ebola tracking", ylab = "% mortality",
main = "Ebola fatality rate in Liberia")
## More advanced: You can also plot by date, but you need to change the class
## of the "Date" column in ebola.Liberia to the date class
class(ebola.Liberia$Date)
## You can change the class using the "as." family of functions
## First, change it to a character:
ebola.Liberia$Date <- as.character(ebola.Liberia$Date)
## Now, you can change it to a date:
ebola.Liberia$Date <- as.Date(ebola.Liberia$Date, format = "%m/%d/%Y")
## The 'format' option tells R what format the date is in, using '%m' for month,
## '%d' for day in the month, and '%Y' for 4-digit year. Because R lets you
## tell it the format of a date, it can bring in dates in a lot of different formats
## (e.g., "Jan 1, 2014", "01-Dec-99")
## Once R knows that ebola$Date is a date, you can plot with it:
plot(ebola.Liberia$Date, perc.mort.Liberia, type = "l",
xlab = "Date, 2014", ylab = "% mortality",
main = "Ebola fatality rate in Liberia")
## More advanced: You can also use functions in the 'ggplot2' package to create
## very nice plots.
## Uncomment this and run if you don't have ggplot2 installed yet
#install.packages('ggplot2')
library(ggplot2)
ggplot(ebola.Liberia, aes(x = Date, y = Deaths_Liberia)) + geom_line() +
xlab("Date, 2014") + ylab("Death count")
## Just for fun, try out some different ggplot themes
## Uncomment this and run if you don't have ggthemes installed yet
#install.packages('ggthemes', dependencies = TRUE)
library(ggthemes)
p <- ggplot(ebola.Liberia, aes(x = Date, y = Deaths_Liberia)) + geom_line() +
xlab("Date, 2014") + ylab("Death count")
p + theme_economist() + scale_colour_economist() ## Looks like graphs in The Economist
p + theme_excel() + scale_colour_excel() ## Looks like Excel
p + theme_solarized() + scale_colour_solarized("blue")
p + theme_tufte() ## Edward Tufte's style