personal package with miscellaneous functions, stuff in progress, and tools I use regularly
to install:
# install.packages('devtools')
devtools::install_github('raredd/rawr')
some useful things for ...
kaplan-meier with a whole bunch of extra junk
library('survival')
s <- survfit(Surv(time, status) ~ factor(ph.ecog), cancer)
kmplot(
s,
atrisk.col = TRUE, strata.lab = TRUE,
median = TRUE, hr_text = TRUE, pw_test = TRUE
)
convenience function for survival analysis
kmplot_by(
'factor(ph.ecog)', time = 'time', event = 'status', cancer,
tt_test = TRUE, by = 'sex', strata_lab = FALSE, atrisk.type = 'survival',
atrisk.col = TRUE, median = TRUE, hr_text = TRUE, pw_test = TRUE
)
get the pairwise differences easily
survdiff_pairs(s)
## $n
## 0 1 2 3
## 0 63 NA NA NA
## 1 176 113 NA NA
## 2 113 163 50 NA
## 3 64 114 51 1
##
## $chi.sq
## 0 1 2 3
## 0 NA NA NA NA
## 1 3.456831 NA NA NA
## 2 16.682328 8.433446 NA NA
## 3 5.779873 5.333335 1.24231 NA
##
## $p.value
## 0 1 2 3
## 0 NA 0.1259819 0.0002651 0.0648429
## 1 0.0629909 NA 0.0184191 0.0648429
## 2 0.0000442 0.0036838 NA 0.2650263
## 3 0.0162107 0.0209213 0.2650263 NA
##
## attr(,"class")
## [1] "survdiff_pairs"
make a summary table
combine_table(
surv_table(s),
caption = 'Survival summary'
)
Survival summary | ||||
Time | No. at risk | No. event | Std.Error | Surv (95% CI) |
---|---|---|---|---|
factor(ph.ecog)=0 | ||||
0 | 63 | 0 | 0.000 | 1.000 (1.000, 1.000) |
200 | 50 | 11 | 0.048 | 0.825 (0.736, 0.924) |
400 | 18 | 15 | 0.074 | 0.484 (0.358, 0.654) |
600 | 8 | 6 | 0.075 | 0.309 (0.192, 0.497) |
800 | 4 | 4 | 0.066 | 0.154 (0.067, 0.358) |
1000 | 1 | 1 | 0.064 | 0.077 (0.015, 0.391) |
factor(ph.ecog)=1 | ||||
0 | 113 | 0 | 0.000 | 1.000 (1.000, 1.000) |
200 | 71 | 34 | 0.044 | 0.695 (0.615, 0.786) |
400 | 31 | 26 | 0.051 | 0.405 (0.316, 0.518) |
600 | 13 | 13 | 0.047 | 0.211 (0.136, 0.328) |
800 | 3 | 9 | 0.031 | 0.061 (0.023, 0.164) |
1000 | 1 | 0 | 0.031 | 0.061 (0.023, 0.164) |
factor(ph.ecog)=2 | ||||
0 | 50 | 0 | 0.000 | 1.000 (1.000, 1.000) |
200 | 23 | 25 | 0.072 | 0.485 (0.363, 0.649) |
400 | 8 | 13 | 0.059 | 0.191 (0.104, 0.350) |
600 | 3 | 3 | 0.049 | 0.111 (0.047, 0.266) |
800 | 1 | 2 | 0.034 | 0.037 (0.006, 0.229) |
factor(ph.ecog)=3 | ||||
0 | 1 | 0 | 0.000 | 1.000 (1.000, 1.000) |
box plot + violin plot + dot plot + testing + ...
tplot(
mpg ~ vs + gear, mtcars, test = TRUE,
type = c('dv', 'v', 'd', 'db', 'b', 'd'),
## options for violin plots
quantiles = c(0.25, 0.5, 0.75), lwd = c(1, 2.5, 1)
)
heatmap + row/column matrices + formatting
x <- scale(as.matrix(mtcars))
## row and column colors
rc <- cbind(gear = x[, 'gear'], am = x[, 'am'], vs = x[, 'vs'])
rc[] <- palette()[rc + 2L]
cc <- rbind(var1 = nchar(colnames(x)), var2 = nchar(sort(colnames(x))))
cc[] <- palette()[cc]
heatmap.3(
x, scale = 'column', distfun = 'spearman', hclustfun = 'ward.D2',
RowSideColors = rc, ColSideColors = cc,
labRowCol = rc[, 3], labColCol = cc[1, ],
margins = c(5, 10),
colsep = c(2, 6), rowsep = c(9, 14, 21), sepwidth = c(5, 2)
)
binomial CI forest plot
dat <- mtcars[sample(nrow(mtcars), 100L, TRUE), ]
dat[1, 2] <- NA
vv <- c('cyl', 'vs', 'gear', 'carb')
dat$gear <- factor(dat$gear, 3:6)
bplot(
dat, setNames(vv, case(vv)), 'am',
col = c('red', 'grey50', 'blue', 'grey50', 'blue'),
conf = 0.9, alpha_missing = 0.4
)
tests for doubly- (jonckheere-terpstra) or singly-ordered (kruskal-wallis) tables
tbl <- table(mtcars$gear, mtcars$cyl)
# fisher.test(tbl)
jt.test(tbl)
##
## Jonckheere-Terpstra Test
##
## data: tbl
## z = -3.1551, p-value = 0.001604
kw.test(tbl, simulate.p.value = TRUE)
##
## Test for equality of proportions without continuity correction with simulated p-value (based on 2000 replicates)
##
## data: tbl
## Kruskal-Wallis chi-squared = 16.722, df = 2, p-value < 2.2e-16
## 99 percent confidence interval:
## 0.000000000 0.002299936
test for ordered kruskal-wallis rank-sum
# kruskal.test(mpg ~ cyl, mtcars)
cuzick.test(mpg ~ cyl, mtcars)
##
## Wilcoxon rank-sum test for trend in 3 ordered groups (corrected for ties)
##
## data: mpg by cyl
## z = -5.0741, p-value = 3.894e-07
## sample estimates:
## median of 4 (n=11) median of 6 (n=7) median of 8 (n=14)
## 26.0 19.7 15.2
basically a table
tabler_by2(
mtcars, c('gear', 'vs'), 'cyl',
stratvar = 'am', n = table(mtcars$am),
zeros = '-', pct = TRUE, pct.total = TRUE
)
## vs Total Total 4 6 8 Total 4 6 8
## 3 "0" "12 (38%)" "12 (63%)" "-" "-" "12 (63%)" "-" "-" "-" "-"
## "1" "3 (9%)" "3 (16%)" "1 (5%)" "2 (11%)" "-" "-" "-" "-" "-"
## 4 "0" "2 (6%)" "-" "-" "-" "-" "2 (15%)" "-" "2 (15%)" "-"
## "1" "10 (31%)" "4 (21%)" "2 (11%)" "2 (11%)" "-" "6 (46%)" "6 (46%)" "-" "-"
## 5 "0" "4 (13%)" "-" "-" "-" "-" "4 (31%)" "1 (8%)" "1 (8%)" "2 (15%)"
## "1" "1 (3%)" "-" "-" "-" "-" "1 (8%)" "1 (8%)" "-" "-"
basically a table
tabler_stat2(
within(mtcars, cyl <- factor(cyl, ordered = TRUE)),
c('Miles/gal' = 'mpg', 'Engine (V/S)' = 'vs', Cylinders = 'cyl'),
c('# of gears' = 'gear'), correct = 'BH',
htmlArgs = list(caption = 'Table 1.')
)
Table 1. | ||||||||
# of gears | ||||||||
---|---|---|---|---|---|---|---|---|
Total n = 32 (%) |
3 n = 15 (47) |
4 n = 12 (38) |
5 n = 5 (16) |
p-value | BH p-value | |||
Miles/gal | ||||||||
Median (range) | 19.2 (10.4 - 33.9) | 15.5 (10.4 - 21.5) | 22.8 (17.8 - 33.9) | 19.7 (15.0 - 30.4) | < 0.001^ | 0.002 | ||
Engine (V/S) | ||||||||
Median (range) | 0 (0 - 1) | 0 (0 - 1) | 1 (0 - 1) | 0 (0 - 1) | 0.001‡ | 0.002 | ||
Cylinders | ||||||||
4 | 11 (34) | 1 (7) | 8 (67) | 2 (40) | 0.006§ | 0.006 | ||
6 | 7 (22) | 2 (13) | 4 (33) | 1 (20) | ||||
8 | 14 (44) | 12 (80) | - | 2 (40) | ||||
^Kruskal-Wallis rank-sum test, ‡Fisher's exact test, §Test for trend in proportions |
a table, basically
set.seed(1)
r <- c('CR', 'PR', 'SD', 'PD', 'NE')
x <- factor(sample(r, 30, replace = TRUE), r)
table(x)
## x
## CR PR SD PD NE
## 8 8 4 3 7
t(t(tabler_resp(x, 3:1)))
## [,1]
## CR "8/30, 27% (95% CI: 12 - 46%)"
## PR "8/30, 27% (95% CI: 12 - 46%)"
## SD "4/30, 13% (95% CI: 4 - 31%)"
## PD "3/30, 10% (95% CI: 2 - 27%)"
## NE "7/30, 23% (95% CI: 10 - 42%)"
## SD or better "20/30, 67% (95% CI: 47 - 83%)"
## PR or better "16/30, 53% (95% CI: 34 - 72%)"
## CR or better "8/30, 27% (95% CI: 12 - 46%)"
in-line convenience functions
intr(mtcars$mpg)
intr(mtcars$mpg, conf = 0.95)
countr(mtcars$cyl)
countr(table(mtcars$vs), frac = TRUE)
## [1] "19 (range: 10 - 34)"
## [1] "19 (95% CI: 10 - 33)"
## [1] "4 (n = 11, 34%), 6 (n = 7, 22%), and 8 (n = 14, 44%)"
## [1] "0 (n = 18/32, 56%) and 1 (n = 14/32, 44%)"
for survival analysis
surv_median(s)
surv_median(s, ci = TRUE)
surv_prob(s)
surv_prob(s, times = c(1, 3) * 100, ci = TRUE)
## unexported but useful (log-rank, pair-wise log-rank, tarone trend, wald)
rawr:::lr_pval(s)
rawr:::pw_pval(s)
rawr:::tt_pval(s)
rawr:::hr_pval(s)
## [1] "394, 306, 199, and 118"
## [1] "394 (95% CI: 348 - 574), 306 (95% CI: 268 - 429), 199 (95% CI: 156 - 288), and 118 (95% CI: NR - NR)"
## [1] "1.00, 0.82, 0.48, 0.31, 0.15, and 0.08"
## [1] "0.89 (95% CI: 0.81 - 0.97) and 0.73 (95% CI: 0.62 - 0.85)"
## [1] 6.642535e-05
## 0 vs 1 0 vs 2 0 vs 3 1 vs 2 1 vs 3 2 vs 3
## 0.0629909491 0.0000441907 0.0162107154 0.0036838149 0.0209213160 0.2650263152
## [1] 2.772269e-06
## factor(ph.ecog)1 factor(ph.ecog)2 factor(ph.ecog)3
## 6.335907e-02 4.483837e-05 3.136764e-02
stats
binconr(25, 50)
binconr(25, 50, show_conf = FALSE, frac = TRUE, percent = FALSE)
## length 2 vectors assume two-stage confidence intervals
binconr(c(10, 25), c(20, 30), show_conf = FALSE, frac = TRUE)
## p-values
pvalr(1e-3)
pvalr(1e-8, show.p = TRUE)
## [1] "50% (95% CI: 36 - 64%)"
## attr(,"method")
## [1] "exact"
## [1] "25/50, 0.50 (0.36 - 0.64)"
## attr(,"method")
## [1] "exact"
## [1] "25/50, 50% (29 - 73%)"
## attr(,"method")
## [1] "two-stage"
## [1] "0.001"
## [1] "p < 0.001"
in-line test summaries
x <- mtcars$vs
y <- mtcars$am
# fisher.test(x, y)
inl_fisher(x, y)
inl_fisher(x, y, details = FALSE)
# chisq.test(table(x, y))
inl_chisq(x, y)
inl_chisq(x, y, details = FALSE)
# wilcox.test(mtcars$mpg ~ y)
inl_wilcox(mtcars$mpg, y)
# cuzick.test(mpg ~ gear, mtcars)
inl_cuzick(mtcars$mpg, mtcars$gear)
# jt.test(table(mtcars$gear, mtcars$cyl))
inl_jt(mtcars$gear, mtcars$cyl)
# ca.test(table(mtcars$vs, mtcars$cyl))
inl_ca(mtcars$vs, mtcars$cyl)
## [1] "OR: 1.96 (95% CI: 0.38 - 10.59), Fisher's exact p-value: 0.47"
## [1] "p = 0.47"
## [1] "χ<sup>2</sup>: 0.35 (1 df), chi-squared p-value: 0.56"
## [1] "p = 0.56"
## [1] "w: 42.00, Wilcoxon rank-sum p-value: 0.002"
## [1] "z: 2.69 (3 ordered groups), Cuzick trend p-value: 0.007"
## [1] "z: -3.16, Jonckheere-Terpstra p-value: 0.002"
## [1] "χ<sup>2</sup>: 21.04 (1 df), Cochran-Armitage p-value: < 0.001"
etc
iprint(letters[1:3])
iprint(letters[1:3], copula = ' & ')
num2char(-5)
num2char(134)
## [1] "a, b, and c"
## [1] "a, b & c"
## [1] "Negative five"
## [1] "One hundred thirty-four"