-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathms.TZ_figures.R
328 lines (275 loc) · 16.6 KB
/
ms.TZ_figures.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
# This script will produce all figures and tables given in the manuscript, saved
# in the figs/ and output/paper/ folders respectively.
rm(list=ls())
# Load in libraries
library(dplyr)
library(ggplot2)
library(lmerTest)
library(lme4)
# Load functions
source("R/mixture_model.R")
source("R/TZ_p_multiple.bites.R")
# Set palette for all figures
fig_palette <- c("#0072B2", "#E69F00")
################
# 1. Load data #
################
# Load data from output folder
CT_data <- read.csv("output/CT_data.csv", stringsAsFactors=FALSE)
CT_data_rabid <- read.csv("output/CT_data_rabid.csv", stringsAsFactors=FALSE)
HC_bite_summary <- read.csv("output/HC_bite_summary_patients.csv", stringsAsFactors=FALSE)
infect_results <- read.csv("output/p_infect.csv", stringsAsFactors=FALSE)
model_results <- read.csv("output/p_infect_model.csv", stringsAsFactors=FALSE)
HC_data <- read.csv("output/HC_data.csv", stringsAsFactors=FALSE)
# Limit contact tracing data to Serengeti & Ngorongoro
ser_ngor <- CT_data[which(CT_data$District %in% c("Serengeti", "Ngorongoro")),]
ser_ngor_rabid <- ser_ngor[which(ser_ngor$Rabid=="Yes"),]
###################################################################
# Figure 1: Variation in annual incidence of patient presentation #
###################################################################
# Set the order and District factor by dog population
boxplot_data <- HC_bite_summary[order(HC_bite_summary$Dogs),]
row.names(boxplot_data) <- NULL
levsord <- unique(boxplot_data$District)
boxplot_data$District <- factor(boxplot_data$District, levels = levsord, ordered = TRUE)
boxplot_data <- boxplot_data[with(boxplot_data, order(HDR)), ]
boxplot_data$District <- factor(boxplot_data$District, levels = unique(boxplot_data$District))
# Produce Figure 1
pdf("figs/Bite_incidence_Boxplot.pdf", width=10, height=8)
ggplot(boxplot_data, aes(x=District, y=bites_per_pop_per_100000, fill=Setting)) +
theme_classic()+
geom_boxplot(position=position_dodge(0.9)) +
theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
geom_point(shape=21, size=2, position=position_dodge(0.9)) +
stat_summary(fun.y=mean, geom="point", size = 2, colour="black", position=position_dodge(0.9)) +
labs(x = "District", y="Incidence of bite patients per 100,000") +
scale_fill_manual(name="Setting", values=fig_palette) + ylim(0,165) +
geom_segment(aes(x=1, y=160, xend=length(unique(boxplot_data$District)), yend=160),
arrow = arrow(ends="both", length=unit(0.5, "cm"))) +
annotate("text", x=3.5, y=155, label=paste0("Low HDR (", round(min(boxplot_data$HDR), digits=1), ")")) +
annotate("text", x=length(unique(boxplot_data$District))-2.5, y=155,
label=paste0("High HDR (", round(max(boxplot_data$HDR), digits=1), ")"))
dev.off()
###############################################################
# Table 1: Probable rabies exposures and deaths for bite site #
###############################################################
# Use mixture model to calculate overall probability of infection
quantiles_list <- get_quantiles(model_results)
# Produce table
final_infect_df <- data.frame(Bite_loc = infect_results$Bite.loc,
Order = infect_results$p_deaths,
P_death = paste0(round(infect_results$p_deaths, digits=3), " (", round(infect_results$P_death_LCI, digits=3), "-", round(infect_results$P_death_UCI, digits=3), ")"),
N_death = infect_results$n_deaths,
N_no_pep = infect_results$n_no.pep,
P_bite = quantiles_list$p_bite_site,
N_rabid_bites = infect_results$n_bite.from.rabid.animal,
stringsAsFactors = FALSE)
# Use function to calculate quantiles
quantiles_list <- get_quantiles(model_results)
# Create new row for totals
new_row <- data.frame(Bite_loc = "", Order = "", P_death = quantiles_list$p_infect_overall,
N_death = sum(final_infect_df$N_death),
N_no_pep = sum(final_infect_df$N_no_pep), P_bite = "",
N_rabid_bites = sum(final_infect_df$N_rabid_bites),
stringsAsFactors = FALSE)
final_infect_df <- rbind(final_infect_df, new_row)
# Re-order dataframe
final_infect_df <- final_infect_df[order(final_infect_df$Order, decreasing=TRUE),]
final_infect_df$Order <- NULL
# Update column names
colnames(final_infect_df) <- c("Bite location",
"Probability of death (95% CI)", "Number of deaths",
"Number of bite victims that did not receive PEP",
"Probability of bite site (95% CI)", "Number of suspect rabies exposures")
# Save output
write.csv(final_infect_df, "output/paper/Probablity_of_death.csv", row.names=FALSE)
###########################################################################
# Table 2: Details of human deaths where late/incomplete PEP was received #
###########################################################################
# Subset data for individuals that died of Rabies, with either incomplete or late PEP
deaths_details <- CT_data_rabid[which(CT_data_rabid$Patient.outcome=="Died"),]
deaths_details <- deaths_details[which(deaths_details$complete=="incomplete" | deaths_details$late.PEP=="late"),]
# Create table
deaths_details <- data.frame(PEP_Failure=NA, Region=deaths_details$Region,
Age=deaths_details$Age..in.years., Sex=deaths_details$Sex,
PEP=deaths_details$PEP.administration,
n_pep_received=deaths_details$n.PEP.recieved,
delay_to_PEP.1=deaths_details$delay.pep.1,
n_bites=deaths_details$n.bite.locs, Head=deaths_details$Head,
Arm=deaths_details$Arm, Hand=deaths_details$Hand,
Trunk=deaths_details$Trunk, Leg=deaths_details$Leg,
Foot=deaths_details$Foot, Bite_loc=NA)
# Run loop to find where on the body each individual was bitten, saving results
# into a vector and then pasting into single string
for(i in 1:nrow(deaths_details)){
bite_loc_string <- c()
if(deaths_details$Head[i]==1){
bite_loc_string <- c(bite_loc_string, "Head")
}
if(deaths_details$Arm[i]==1){
bite_loc_string <- c(bite_loc_string, "Arm")
}
if(deaths_details$Hand[i]==1){
bite_loc_string <- c(bite_loc_string, "Hand")
}
if(deaths_details$Trunk[i]==1){
bite_loc_string <- c(bite_loc_string, "Trunk")
}
if(deaths_details$Leg[i]==1){
bite_loc_string <- c(bite_loc_string, "Leg")
}
if(deaths_details$Foot[i]==1){
bite_loc_string <- c(bite_loc_string, "Foot")
}
deaths_details$Bite_loc[i] <- toString(bite_loc_string)
}
# Run loop to give detail on PEP Failure & PEP route (ID/IM)
deaths_details$PEP <- as.character(deaths_details$PEP)
for(i in 1:nrow(deaths_details)){
if(deaths_details$n_pep_received[i]>2 & deaths_details$delay_to_PEP.1[i]>0){
deaths_details$PEP_Failure[i] <- "Delayed"
} else if(deaths_details$n_pep_received[i]<3 & deaths_details$delay_to_PEP.1[i]==0){
deaths_details$PEP_Failure[i] <- "Incomplete"
} else if(deaths_details$n_pep_received[i]<3 & deaths_details$delay_to_PEP.1[i]>0){
deaths_details$PEP_Failure[i] <- "Delayed & Incomplete"
}
if(deaths_details$Region[i] %in% c("Mara", "Arusha")){
deaths_details$PEP[i] <- "IM"
} else if(is.na(deaths_details$PEP[i]) | deaths_details$PEP[i]=="IV"){
deaths_details$PEP[i] <- "ID"
}
}
# Select only required rows
deaths_details <- dplyr::select(deaths_details, PEP_Failure, Age, Sex, n_pep_received,
PEP, delay_to_PEP.1, n_bites, Bite_loc)
# Sort by Number of PEP doses, then number of days delay
deaths_details <- deaths_details[with(deaths_details, order(-n_pep_received, -delay_to_PEP.1)), ]
# Save output
write.csv(deaths_details, "output/paper/Death_details.csv", row.names=FALSE)
##################################################
# Figure 2A: Delay between bite date & PEP 1 Hist #
##################################################
# Remove any delays outside of range (i.e. outliers)
summary(CT_data_rabid$delay.pep.1)
barplot_df <- CT_data_rabid[which(CT_data_rabid$delay.pep.1>=0 & CT_data_rabid$delay.pep.1<91),]
# Change "Other" patient outcomes to "Fine"
barplot_df$Patient.outcome[which(barplot_df$Patient.outcome=="Other")] <- "Fine"
# Seperate off NTz and STz
NTz_barplot <- barplot_df[which(barplot_df$Study.Area=="NTz"),]; nrow(NTz_barplot)
STz_barplot <- barplot_df[which(barplot_df$Study.Area %in% c("STz", "Pemba")),]; nrow(STz_barplot)
# NTZ: Find counts per day delay, create dataframe and transform to matrix
NTz_delays <- hist(NTz_barplot$delay.pep.1, breaks=c(seq(-1, 14, by=1), 21, 35, 64, 84))
NTz_barplot.df <- data.frame(Delay=NTz_delays$breaks[2:20],
N_Fine=NTz_delays$counts,
Location=rep("NTz", 19))
NTz_barplot.df$Prop = NTz_barplot.df$N_Fine/sum(NTz_barplot.df$N_Fine)
NTz_barplot.df$Delay <- factor(NTz_barplot.df$Delay, levels=sort(unique(NTz_barplot.df$Delay)))
# STZ: Find counts per day delay, create dataframe and transform to matrix
STz_delays <- hist(STz_barplot$delay.pep.1, breaks=c(seq(-1, 14, by=1), 21, 35, 64, 84))
STz_barplot.df <- data.frame(Delay=STz_delays$breaks[2:20],
N_Fine=STz_delays$counts,
Location=rep("STz", 19))
STz_barplot.df$Prop = STz_barplot.df$N_Fine/sum(STz_barplot.df$N_Fine)
STz_barplot.df$Delay <- factor(STz_barplot.df$Delay, levels=sort(unique(STz_barplot.df$Delay)))
# Bind together
barplot.df <- rbind(NTz_barplot.df, STz_barplot.df)
# Create plot
pdf("figs/delay_in_pep_V3.pdf", width=10, height=6)
ggplot(data=barplot.df, aes(x=Delay, y=Prop, fill=Location)) +
geom_col(position=position_dodge(), color="black") + theme_classic() + ylim(0,1) +
labs(x="Delay between exposure and initiation of PEP",
y="Proportion that presented at a health facility") +
scale_fill_manual(name="", breaks=c("NTz", "STz"),
labels=c("Serengeti & Ngorongoro", "Southern Tanzania"),
values=fig_palette) +
theme(legend.position="top",
text = element_text(size=12),
axis.text.x = element_text(angle=45, hjust=1)) +
scale_x_discrete(breaks=c(0,7,14,21,35,64,84),
labels=c("0 days", "7 days", "14 days", "2-3 weeks", "3-4 weeks", "1-2 months", ">2 months")) +
geom_text(label="A)", aes(x=-Inf, y=Inf, hjust=-1, vjust=1), size=7)
dev.off()
#####################################################
# Figure 2B - Barplot of proportion doses receieved #
#####################################################
# Exclude 'patients' that arrived at hospital with Rabies symptoms
HC_data_sub <- HC_data[which(HC_data$visit_status>0),]
# Find number of patients where PEP was available upon arrival for treatment
hc_visits <- HC_data_sub %>%
group_by(visit_status) %>%
summarise(n_bite_victims = length(which(is_antirabies_available=="yes")))
# Calculate proportion of individuals that received PEP
hc_visits$p_receive <- hc_visits$n_bite_victims/length(which(HC_data_sub$visit_status==1))
hc_visits$PEP_available <- "yes"
hc_visits$data <- "MS"
# Subset for individuals in CT data that sought healthcare
CT_barplot <- ser_ngor_rabid[which(ser_ngor_rabid$Sought==1),]
# Find number of individuals that received each number of PEP doses
ct_doses <- data.frame(visit_status=1:5,
n_bite_victims=c(length(which(CT_barplot$n.PEP.recieved>0)),
length(which(CT_barplot$n.PEP.recieved>1 & CT_barplot$PEP.1==1)),
length(which(CT_barplot$n.PEP.recieved>2 & CT_barplot$PEP.1==1)),
length(which(CT_barplot$n.PEP.recieved>3 & CT_barplot$PEP.1==1)),
length(which(CT_barplot$n.PEP.recieved>4 & CT_barplot$PEP.1==1))))
# Calculate proportion of individuals that received PEP
ct_doses$p_receive <- ct_doses$n_bite_victims/nrow(CT_barplot)
ct_doses$PEP_available <- "yes"
ct_doses$data <- "CT"
# Bind dataframes together & set factor levels
barplot_data <- rbind(hc_visits, ct_doses)
barplot_data$data <- factor(barplot_data$data, levels = c("CT", "MS"))
# Create plot
pdf("figs/Proportion_PEP_completion.pdf", width=10, height=6)
ggplot(data=barplot_data, aes(x=visit_status, y=p_receive, fill=data)) +
geom_bar(stat="identity", position=position_dodge(), colour="black") +
theme_classic() + ylim(0,1) +
scale_fill_manual(name="", values=fig_palette,
breaks=c("CT", "MS"), labels=c("Serengeti & Ngorongoro", "Southern Tanzania")) +
labs(x="PEP doses received", y="Proportion of patients that received PEP") +
theme(legend.position="top",
text = element_text(size=12)) +
geom_text(label="B)", aes(x=-Inf, y=Inf, hjust=-1, vjust=1), size=7)
dev.off()
###############################
# Create output for schematic #
###############################
# Remove "sub system admin" - a Clinic name that does has not been matched
HC_sub <- HC_data[which(HC_data$clinic_name!="sub system admin"),]
# Remove patients that did not receive PEP
HC_sub <- HC_sub[which(HC_sub$visit_status!=0 & HC_sub$is_antirabies_available!="no"),]
# Collect unique patient trips i.e. 1 row for a patient that visited 1 facility multiple times,
# vs. 3 rows for a patient that visited 3 facilities
unique_trips <- unique(dplyr::select(HC_sub, Patient_id,
Correct_patient_region, Correct_patient_district,
Correct_clinic_region, Correct_clinic_district, clinic_name))
# Summarise number of facilities visited by each individuals
n_facilities_visited <- unique_trips %>%
group_by(Patient_id) %>%
summarise(n_facilities=length(Patient_id))
# Collect total number of visits where home and clinic location info is available
length(which(is.na(HC_sub$Correct_patient_district) | is.na(HC_sub$Correct_patient_region) | is.na(HC_sub$Correct_clinic_district) | is.na(HC_sub$Correct_clinic_region)))
n_total_visits = nrow(HC_sub)
# For individuals that travelled to one clinic
one_clinic_ind <- n_facilities_visited$Patient_id[which(n_facilities_visited$n_facilities==1)]
one_clinic <- HC_sub[which(HC_sub$Patient_id %in% one_clinic_ind),]
# For individuals that travelled to multiple clinics
multiple_clin_ind <- n_facilities_visited$Patient_id[which(n_facilities_visited$n_facilities>1)]
multiple_clin <- HC_sub[which(HC_sub$Patient_id %in% multiple_clin_ind),]
# Create dataframe
one_n_same_district = length(which(one_clinic$Correct_patient_district==one_clinic$Correct_clinic_district & one_clinic$Correct_patient_region==one_clinic$Correct_clinic_region))
one_n_same_region = length(which(one_clinic$Correct_patient_district!=one_clinic$Correct_clinic_district & one_clinic$Correct_patient_region==one_clinic$Correct_clinic_region))
one_n_diff_region = length(which(one_clinic$Correct_patient_region!=one_clinic$Correct_clinic_region))
one_clinic_summary <- data.frame(category="1 clinic", n_total=nrow(one_clinic),
n_inside_home_dis=one_n_same_district, per_inside_home_dis=round((one_n_same_district/n_total_visits)*100, digits=1),
n_outside_home_dis=one_n_same_region, per_outside_home_dis=round((one_n_same_region/n_total_visits)*100, digits=1),
n_outisde_home_reg=one_n_diff_region, per_outside_home_reg=round((one_n_diff_region/n_total_visits)*100, digits=1))
mult_n_same_district = length(which(multiple_clin$Correct_patient_district==multiple_clin$Correct_clinic_district & multiple_clin$Correct_patient_region==multiple_clin$Correct_clinic_region))
mult_n_same_region = length(which(multiple_clin$Correct_patient_district!=multiple_clin$Correct_clinic_district & multiple_clin$Correct_patient_region==multiple_clin$Correct_clinic_region))
mult_n_diff_region = length(which(multiple_clin$Correct_patient_region!=multiple_clin$Correct_clinic_region))
mult_clinic_summary <- data.frame(category=">1 clinic", n_total=nrow(multiple_clin),
n_inside_home_dis=mult_n_same_district, per_inside_home_dis=round((mult_n_same_district/n_total_visits)*100, digits=1),
n_outside_home_dis=mult_n_same_region, per_outside_home_dis=round((mult_n_same_region/n_total_visits)*100, digits=1),
n_outisde_home_reg=mult_n_diff_region, per_outside_home_reg=round((mult_n_diff_region/n_total_visits)*100, digits=1))
overall_clinic_summary <- rbind(one_clinic_summary, mult_clinic_summary)
# Save output
write.csv(overall_clinic_summary, "output/paper/clinic_visits_summary.csv", row.names=FALSE)