-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
* roughly added the design * update design doc * [skip style] [skip vbump] Restyle files --------- Co-authored-by: github-actions <41898282+github-actions[bot]@users.noreply.github.com>
- Loading branch information
1 parent
c6f11fc
commit 498ee82
Showing
4 changed files
with
245 additions
and
35 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -6,7 +6,7 @@ Date: 2023-06-20 | |
Authors@R: | ||
person("openpharma", , , "[email protected]", role = c("aut", "cre")) | ||
Description: R package template with GitHub Actions workflows included. | ||
License: Apache License 2.0 | file LICENSE | ||
License: Apache License 2.0 | ||
URL: https://github.com/openpharma/RobinCar2/ | ||
BugReports: https://github.com/openpharma/RobinCar2/issues | ||
Depends: | ||
|
This file was deleted.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,244 @@ | ||
--- | ||
title: "Design Doc of RobinCar2" | ||
author: Liming Li | ||
--- | ||
|
||
In this doc we will summarize the general design. | ||
|
||
For robust inference, there are several key components: unbiased prediction, robust covariance, and delta method. | ||
|
||
So the general workflow will be | ||
|
||
1. Construct the model. | ||
1. Obtain unbiased prediction, depending on the data | ||
1. Obtain the robust covariance (also with unbiased prediction as input) | ||
1. use delta method to obtain treatment effect and its inference | ||
|
||
## Create dummy data | ||
|
||
Before we start our model, first we need to prepare our data. | ||
|
||
In the following part we create a simple simulation data. | ||
z1 comes from a bernouli distribution of p = 0.5. | ||
z2 comes from a multinomial distribution of p = (0.4, 0.3, 0.3) | ||
cov1 comes from a normal distribution. | ||
trt is based on stratified block randomization, block size is 6, stratified by z1 and z2. | ||
y is the response, the formula is | ||
|
||
exp(y) = 0.2 + 0.5 * (trt = "trt1") + 0.7 * (trt = "trt2") + 1.5 * (z1 = "b") - 1 * (z2 = y) - 0.5 * (z2 = z) + 0.2 * cov | ||
|
||
```{r} | ||
library(stats) | ||
n <- 200 | ||
block_rand <- function(block = c(0, 0, 1, 1)) { | ||
function(n) { | ||
r <- lapply(seq_len(ceiling(n / length(block))), function(i) { | ||
sample(block) | ||
}) | ||
unlist(r)[1:n] | ||
} | ||
} | ||
by_strata <- function(f, strata) { | ||
ret <- rep(NA, length(strata)) | ||
for (x in split(seq_len(length(strata)), strata)) { | ||
ret[x] <- f(length(x)) | ||
} | ||
return(ret) | ||
} | ||
sim_data <- function(n) { | ||
cov1 <- rnorm(n) | ||
z1 <- sample(size = n, factor(c("a", "b")), c(0.5, 0.5), replace = TRUE) | ||
z2 <- sample(size = n, factor(c("x", "y", "z")), c(0.4, 0.3, 0.3), replace = TRUE) | ||
permute_block <- c(0, 0, 1, 1) | ||
trt <- by_strata(block_rand(c("trt1", "trt1", "trt2", "trt2", "pbo", "pbo")), interaction(z1, z2)) | ||
trt <- factor(trt, levels = c("pbo", "trt1", "trt2")) | ||
df <- data.frame(trt, z1, z2, cov1) | ||
x_mat <- model.matrix(~ trt + z1 + z2 + cov1, data = df) | ||
coef <- c(0.2, 0.5, 0.7, 1.5, -1, -0.5, 0.2) | ||
theta <- x_mat %*% coef | ||
y <- plogis(theta) | ||
y_bin <- as.integer(runif(n) < y) | ||
df$y <- y_bin | ||
df | ||
} | ||
d <- sim_data(500) | ||
``` | ||
|
||
## Construct the model | ||
|
||
In this part, we will create the model based on the data provided. | ||
Let's begin with binary outcome: | ||
|
||
```{r} | ||
fit <- glm(y ~ trt:z1 + trt, family = binomial(), data = d) | ||
``` | ||
|
||
## Obtain the predictions | ||
|
||
```{r} | ||
predict_cf <- function(fit, trt, data, ...) { | ||
UseMethod("predict_cf") | ||
} | ||
predict_cf.glm <- function(fit, trt, data, ...) { | ||
lvls <- fit$xlevels[[trt]] | ||
# data has to be a data.frame | ||
df <- rbind(fit$model, fit$model, fit$model) | ||
df[[trt]] <- factor(rep(lvls, each = nrow(fit$model)), levels = lvls) | ||
preds <- predict(fit, type = "response", newdata = df) | ||
matrix(preds, ncol = length(lvls), dimnames = list(row.names(fit$model), lvls)) | ||
} | ||
``` | ||
|
||
with this `predict_cf` function we already obtain the counterfactual response | ||
|
||
```{r} | ||
pred <- predict_cf(fit, "trt") | ||
pred | ||
``` | ||
|
||
## Obtain the biasness | ||
|
||
```{r} | ||
bias <- function(fit, trt, strat, data) { | ||
UseMethod("bias") | ||
} | ||
bias.glm <- function(fit, trt, strat, data) { | ||
trt_var <- fit$model[[trt]] | ||
if (length(strat) != 0) { | ||
strat_var <- fit$data[, strat] | ||
} else { | ||
strat_var <- rep(0L, length(fit$y)) | ||
} | ||
residuals <- fit$y - fit$fitted.values | ||
d <- matrix(NA_real_, nrow = length(fit$y), ncol = length(fit$xlevels[[trt]])) | ||
id_strat <- split(seq_len(length(residuals)), strat_var) | ||
for (i in id_strat) { | ||
df <- vapply(split(residuals[i], trt_var[i]), function(xx) mean(xx), FUN.VALUE = 0) | ||
d[i, ] <- matrix(df, nrow = length(i), ncol = length(df), byrow = TRUE) | ||
} | ||
d | ||
} | ||
``` | ||
|
||
## Ensure the unbiasness | ||
|
||
to ensure the unbiasness, the predict_cf need additional argument | ||
|
||
```{r} | ||
predict_cf.glm <- function(fit, trt, data, ensure_unbias = TRUE, strata = NULL, ...) { | ||
lvls <- fit$xlevels[[trt]] | ||
# data has to be a data.frame | ||
df <- rbind(fit$model, fit$model, fit$model) | ||
df[[trt]] <- factor(rep(lvls, each = nrow(fit$model)), levels = lvls) | ||
preds <- predict(fit, type = "response", newdata = df) | ||
ret <- matrix(preds, ncol = length(lvls), dimnames = list(row.names(fit$model), lvls)) | ||
if (ensure_unbias) { | ||
ret <- ret - bias(fit, trt, strata) | ||
} | ||
ret | ||
} | ||
``` | ||
|
||
```{r} | ||
pred <- predict_cf(fit, "trt") | ||
``` | ||
|
||
|
||
## Robust variance | ||
|
||
```{r} | ||
adjust_pi <- function(pi_t) { | ||
diag(pi_t) - pi_t %*% t(pi_t) | ||
} | ||
get_erb <- function(resi, strata, trt, pit, randomization) { | ||
if (length(strata) == 0) { | ||
return(0) | ||
} | ||
if (randomization %in% c("simple", "pocock-simon")) { | ||
return(0) | ||
} | ||
# Calculate Omega Z under simple | ||
omegaz_sr <- adjust_pi(pit) | ||
idx <- split(seq_len(length(resi)), cbind(trt, strata)) | ||
resi_per_strata <- vapply(idx, function(ii) mean(resi[ii]), FUN.VALUE = 0) | ||
# Calculate strata levels and proportions for | ||
# the outer expectation | ||
strata_props <- vapply(idx, length, FUN.VALUE = 0L) | ||
strata_props <- strata_props / sum(strata_props) | ||
# Estimate R(B) by first getting the conditional expectation | ||
# vector for a particular strata (vector contains | ||
# all treatment groups), then dividing by the pi_t | ||
rb_z <- resi_per_strata / as.numeric(pit) | ||
# Compute the R(B)[Omega_{SR} - Omega_{Z_i}]R(B) | Z_i | ||
# for each Z_i | ||
strata_levels <- length(resi_per_strata) | ||
n_trt <- length(pit) | ||
ind <- matrix(seq_len(strata_levels), byrow = TRUE, ncol = n_trt) | ||
rb_z_sum <- lapply( | ||
seq_len(nrow(ind)), | ||
function(x) rb_z[ind[x, ]] %*% t(rb_z[ind[x, ]]) * sum(strata_props[ind[x, ]]) | ||
) | ||
rb_z_sum <- Reduce(`+`, rb_z_sum) | ||
rb_z_sum * omegaz_sr | ||
} | ||
vcov_robin <- function(fit, ...) { | ||
UseMethod("vcov_robin") | ||
} | ||
are <- function(objs, cls) { | ||
all(vapply(objs, is, class2 = cls, FUN.VALUE = TRUE)) | ||
} | ||
vcov_robin.glm <- function(fit, trt, strat = NULL, preds = predict_cf(fit, trt, strat), sr_decompose = TRUE, randomization = "simple", ...) { | ||
raw_data <- fit$data | ||
resi <- residuals(fit, type = "response") | ||
est <- colMeans(preds) | ||
var_preds <- var(preds) | ||
pit <- as.numeric(table(raw_data[[trt]]) / nrow(raw_data)) | ||
trt_lvls <- fit$xlevels[[trt]] | ||
idx <- split(seq_len(nrow(raw_data)), raw_data[[trt]]) | ||
cov_ymu <- vapply(idx, function(is) stats::cov(fit$y[is], preds[is, ]), FUN.VALUE = rep(0, ncol(preds))) | ||
vcov_sr <- vapply(idx, function(is) stats::var(fit$y[is]), FUN.VALUE = 0) / pit | ||
if (sr_decompose) { | ||
vcov_sr <- vcov_sr + (diag(var_preds) - 2 * diag(cov_ymu)) / pit | ||
} | ||
v <- diag(vcov_sr) + cov_ymu + t(cov_ymu) - var_preds | ||
if (!randomization %in% c("simple", "pocock-simon") && length(strat) > 0) { | ||
v <- v - get_erb(resi, raw_data[strat], raw_data[[trt]], pit, randomization) | ||
} | ||
ret <- v / length(resi) | ||
dimnames(ret) <- list(trt_lvls, trt_lvls) | ||
return(ret) | ||
} | ||
vcov_robin(fit, "trt", "z1") | ||
``` | ||
|
||
|
||
## robincar_glm wrapper | ||
|
||
```{r} | ||
robincar_glm2 <- function(data, formula, trt, strata, car_scheme, ...) { | ||
fit <- glm(formula = formula, data = data, ...) | ||
pred <- predict_cf(fit, trt, ensure_unbias = TRUE) | ||
var <- vcov_robin(fit, trt = trt, strat = strata, randomization = car_scheme, preds = pred) | ||
list( | ||
pred = colMeans(pred), | ||
var = var | ||
) | ||
} | ||
robincar_glm(d, formula = y ~ z1:trt + trt, "trt", "z1", "coin") | ||
``` | ||
|
||
potentially, there can be other arguments just like `RobinCar::robincar_glm`. | ||
The formula can be inferred from the choices of "ANOVA", "ANCOVA", "ANHECOVA", etc. if not provided. | ||
|
||
All these functions can be exported for direct usage, and also the wrapper is provided. |