Skip to content

Commit

Permalink
roughly added the design (#6)
Browse files Browse the repository at this point in the history
* roughly added the design

* update design doc

* [skip style] [skip vbump] Restyle files

---------

Co-authored-by: github-actions <41898282+github-actions[bot]@users.noreply.github.com>
  • Loading branch information
clarkliming and github-actions[bot] authored Apr 1, 2024
1 parent c6f11fc commit 498ee82
Show file tree
Hide file tree
Showing 4 changed files with 245 additions and 35 deletions.
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ Date: 2023-06-20
Authors@R:
person("openpharma", , , "[email protected]", role = c("aut", "cre"))
Description: R package template with GitHub Actions workflows included.
License: Apache License 2.0 | file LICENSE
License: Apache License 2.0
URL: https://github.com/openpharma/RobinCar2/
BugReports: https://github.com/openpharma/RobinCar2/issues
Depends:
Expand Down
13 changes: 0 additions & 13 deletions LICENSE

This file was deleted.

21 changes: 0 additions & 21 deletions covadj.Rproj

This file was deleted.

244 changes: 244 additions & 0 deletions design/package_structure/design.Rmd
Original file line number Diff line number Diff line change
@@ -0,0 +1,244 @@
---
title: "Design Doc of RobinCar2"
author: Liming Li
---

In this doc we will summarize the general design.

For robust inference, there are several key components: unbiased prediction, robust covariance, and delta method.

So the general workflow will be

1. Construct the model.
1. Obtain unbiased prediction, depending on the data
1. Obtain the robust covariance (also with unbiased prediction as input)
1. use delta method to obtain treatment effect and its inference

## Create dummy data

Before we start our model, first we need to prepare our data.

In the following part we create a simple simulation data.
z1 comes from a bernouli distribution of p = 0.5.
z2 comes from a multinomial distribution of p = (0.4, 0.3, 0.3)
cov1 comes from a normal distribution.
trt is based on stratified block randomization, block size is 6, stratified by z1 and z2.
y is the response, the formula is

exp(y) = 0.2 + 0.5 * (trt = "trt1") + 0.7 * (trt = "trt2") + 1.5 * (z1 = "b") - 1 * (z2 = y) - 0.5 * (z2 = z) + 0.2 * cov

```{r}
library(stats)
n <- 200
block_rand <- function(block = c(0, 0, 1, 1)) {
function(n) {
r <- lapply(seq_len(ceiling(n / length(block))), function(i) {
sample(block)
})
unlist(r)[1:n]
}
}
by_strata <- function(f, strata) {
ret <- rep(NA, length(strata))
for (x in split(seq_len(length(strata)), strata)) {
ret[x] <- f(length(x))
}
return(ret)
}
sim_data <- function(n) {
cov1 <- rnorm(n)
z1 <- sample(size = n, factor(c("a", "b")), c(0.5, 0.5), replace = TRUE)
z2 <- sample(size = n, factor(c("x", "y", "z")), c(0.4, 0.3, 0.3), replace = TRUE)
permute_block <- c(0, 0, 1, 1)
trt <- by_strata(block_rand(c("trt1", "trt1", "trt2", "trt2", "pbo", "pbo")), interaction(z1, z2))
trt <- factor(trt, levels = c("pbo", "trt1", "trt2"))
df <- data.frame(trt, z1, z2, cov1)
x_mat <- model.matrix(~ trt + z1 + z2 + cov1, data = df)
coef <- c(0.2, 0.5, 0.7, 1.5, -1, -0.5, 0.2)
theta <- x_mat %*% coef
y <- plogis(theta)
y_bin <- as.integer(runif(n) < y)
df$y <- y_bin
df
}
d <- sim_data(500)
```

## Construct the model

In this part, we will create the model based on the data provided.
Let's begin with binary outcome:

```{r}
fit <- glm(y ~ trt:z1 + trt, family = binomial(), data = d)
```

## Obtain the predictions

```{r}
predict_cf <- function(fit, trt, data, ...) {
UseMethod("predict_cf")
}
predict_cf.glm <- function(fit, trt, data, ...) {
lvls <- fit$xlevels[[trt]]
# data has to be a data.frame
df <- rbind(fit$model, fit$model, fit$model)
df[[trt]] <- factor(rep(lvls, each = nrow(fit$model)), levels = lvls)
preds <- predict(fit, type = "response", newdata = df)
matrix(preds, ncol = length(lvls), dimnames = list(row.names(fit$model), lvls))
}
```

with this `predict_cf` function we already obtain the counterfactual response

```{r}
pred <- predict_cf(fit, "trt")
pred
```

## Obtain the biasness

```{r}
bias <- function(fit, trt, strat, data) {
UseMethod("bias")
}
bias.glm <- function(fit, trt, strat, data) {
trt_var <- fit$model[[trt]]
if (length(strat) != 0) {
strat_var <- fit$data[, strat]
} else {
strat_var <- rep(0L, length(fit$y))
}
residuals <- fit$y - fit$fitted.values
d <- matrix(NA_real_, nrow = length(fit$y), ncol = length(fit$xlevels[[trt]]))
id_strat <- split(seq_len(length(residuals)), strat_var)
for (i in id_strat) {
df <- vapply(split(residuals[i], trt_var[i]), function(xx) mean(xx), FUN.VALUE = 0)
d[i, ] <- matrix(df, nrow = length(i), ncol = length(df), byrow = TRUE)
}
d
}
```

## Ensure the unbiasness

to ensure the unbiasness, the predict_cf need additional argument

```{r}
predict_cf.glm <- function(fit, trt, data, ensure_unbias = TRUE, strata = NULL, ...) {
lvls <- fit$xlevels[[trt]]
# data has to be a data.frame
df <- rbind(fit$model, fit$model, fit$model)
df[[trt]] <- factor(rep(lvls, each = nrow(fit$model)), levels = lvls)
preds <- predict(fit, type = "response", newdata = df)
ret <- matrix(preds, ncol = length(lvls), dimnames = list(row.names(fit$model), lvls))
if (ensure_unbias) {
ret <- ret - bias(fit, trt, strata)
}
ret
}
```

```{r}
pred <- predict_cf(fit, "trt")
```


## Robust variance

```{r}
adjust_pi <- function(pi_t) {
diag(pi_t) - pi_t %*% t(pi_t)
}
get_erb <- function(resi, strata, trt, pit, randomization) {
if (length(strata) == 0) {
return(0)
}
if (randomization %in% c("simple", "pocock-simon")) {
return(0)
}
# Calculate Omega Z under simple
omegaz_sr <- adjust_pi(pit)
idx <- split(seq_len(length(resi)), cbind(trt, strata))
resi_per_strata <- vapply(idx, function(ii) mean(resi[ii]), FUN.VALUE = 0)
# Calculate strata levels and proportions for
# the outer expectation
strata_props <- vapply(idx, length, FUN.VALUE = 0L)
strata_props <- strata_props / sum(strata_props)
# Estimate R(B) by first getting the conditional expectation
# vector for a particular strata (vector contains
# all treatment groups), then dividing by the pi_t
rb_z <- resi_per_strata / as.numeric(pit)
# Compute the R(B)[Omega_{SR} - Omega_{Z_i}]R(B) | Z_i
# for each Z_i
strata_levels <- length(resi_per_strata)
n_trt <- length(pit)
ind <- matrix(seq_len(strata_levels), byrow = TRUE, ncol = n_trt)
rb_z_sum <- lapply(
seq_len(nrow(ind)),
function(x) rb_z[ind[x, ]] %*% t(rb_z[ind[x, ]]) * sum(strata_props[ind[x, ]])
)
rb_z_sum <- Reduce(`+`, rb_z_sum)
rb_z_sum * omegaz_sr
}
vcov_robin <- function(fit, ...) {
UseMethod("vcov_robin")
}
are <- function(objs, cls) {
all(vapply(objs, is, class2 = cls, FUN.VALUE = TRUE))
}
vcov_robin.glm <- function(fit, trt, strat = NULL, preds = predict_cf(fit, trt, strat), sr_decompose = TRUE, randomization = "simple", ...) {
raw_data <- fit$data
resi <- residuals(fit, type = "response")
est <- colMeans(preds)
var_preds <- var(preds)
pit <- as.numeric(table(raw_data[[trt]]) / nrow(raw_data))
trt_lvls <- fit$xlevels[[trt]]
idx <- split(seq_len(nrow(raw_data)), raw_data[[trt]])
cov_ymu <- vapply(idx, function(is) stats::cov(fit$y[is], preds[is, ]), FUN.VALUE = rep(0, ncol(preds)))
vcov_sr <- vapply(idx, function(is) stats::var(fit$y[is]), FUN.VALUE = 0) / pit
if (sr_decompose) {
vcov_sr <- vcov_sr + (diag(var_preds) - 2 * diag(cov_ymu)) / pit
}
v <- diag(vcov_sr) + cov_ymu + t(cov_ymu) - var_preds
if (!randomization %in% c("simple", "pocock-simon") && length(strat) > 0) {
v <- v - get_erb(resi, raw_data[strat], raw_data[[trt]], pit, randomization)
}
ret <- v / length(resi)
dimnames(ret) <- list(trt_lvls, trt_lvls)
return(ret)
}
vcov_robin(fit, "trt", "z1")
```


## robincar_glm wrapper

```{r}
robincar_glm2 <- function(data, formula, trt, strata, car_scheme, ...) {
fit <- glm(formula = formula, data = data, ...)
pred <- predict_cf(fit, trt, ensure_unbias = TRUE)
var <- vcov_robin(fit, trt = trt, strat = strata, randomization = car_scheme, preds = pred)
list(
pred = colMeans(pred),
var = var
)
}
robincar_glm(d, formula = y ~ z1:trt + trt, "trt", "z1", "coin")
```

potentially, there can be other arguments just like `RobinCar::robincar_glm`.
The formula can be inferred from the choices of "ANOVA", "ANCOVA", "ANHECOVA", etc. if not provided.

All these functions can be exported for direct usage, and also the wrapper is provided.

0 comments on commit 498ee82

Please sign in to comment.