code.Rmd

---
title: "Code Appendix - Childhood Asthma Management Program Project - BSTA 519"
author: "Matthew Hoctor, Bryon Langford"
date: "11/30/2021"
output:
  html_document:
    number_sections: no
    theme: lumen
    toc: yes
    toc_float:
      collapsed: yes
      smooth_scroll: no
  pdf_document:
    toc: yes
---

```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE)
library(dplyr)
library(readxl)
library(tidyverse)
library(ggplot2)
library(psych)
# library(CarletonStats)
# library(pwr)
# library(BSDA)
# library(exact2x2)
# library(car)
# library(dvmisc)
# library(emmeans)
# library(gridExtra)
# library(DescTools)
# library(DiagrammeR)
library(nlme)
library(lme4)
library(doBy)
library(multcomp)
library(mgcv)
# library(geepack)
# library(rje)
library(gtsummary)
# library(parameters)
```

# CAMP dataset

## Data import

Import the CAMP dataset:

```{r}
camp <- read_excel("camp_2021 class project.xlsx")
```

## Filling missing values

Fill missing covariate values and baseline:

```{r}
camp <- fill(camp, c("Anypet", "Woodstove", "Dehumid", "Parent_smokes", "FFratio"), .direction = "downup")
```

## Factoring & re-leveling

Relevel Treatment groups:

```{r}
camp$Trtment[camp$Trtment == "A"] <- "2budesonide"
camp$Trtment[camp$Trtment == "B"] <- "1nedocromil"
camp$Trtment[camp$Trtment == "C"] <- "0placebo"
```

Factor gender variable, and factor and re-level ethnicity variable:

```{r}
camp$Gender <- factor(camp$Gender)
camp$Ethnic <- factor(camp$Ethnic)
camp$Ethnic <- relevel(camp$Ethnic, ref = "w")
```

Relevel environmental variables (pet, woodstove, and parental smoking) such that 0="no" and 1="yes":

```{r}
camp$Anypet[camp$Anypet == 2] <- 0
camp$Woodstove[camp$Woodstove == 2] <- 0
camp$Parent_smokes[camp$Parent_smokes == 2] <- 0
```

Relevel dehumidifier variable:

```{r}
camp$Dehumid[camp$Dehumid == 2] <- 0
```

Convert Dehumidifier variable to factor:

```{r}
# camp$Dehumid <- factor(camp$Dehumid)
```

Convert visit month variable to numeric:

```{r}
camp$Visitc <- as.numeric(camp$Visitc)
```

## Creating variables for baseline values

Adding baseline values of FFratio and environmental exposures dataset; NA values will be filled:

```{r}
for (i in 1:length(camp$id)) {
  camp$BFFratio[i] <- ifelse(camp$Visitc[i] == 0, camp$FFratio[i], NA)
  camp$BAnypet[i] <- ifelse(camp$Visitc[i] == 0, camp$Anypet[i], NA)
  camp$BWoodstove[i] <- ifelse(camp$Visitc[i] == 0, camp$Woodstove[i], NA)
  camp$BDehumid[i] <- ifelse(camp$Visitc[i] == 0, camp$Dehumid[i], NA)
  camp$BParent_smokes[i] <- ifelse(camp$Visitc[i] == 0, camp$Parent_smokes[i], NA)
}
```

Filling in baseline values for subsequent data points:

```{r}
camp <- fill(camp, c("BFFratio", "BAnypet", "BWoodstove", "BDehumid", "BParent_smokes"), .direction = "down")
```

## Wide dataset

Create wide dataset with baseline FFratio variable:

```{r}
campW <- pivot_wider(
  camp, 
  id_cols = c(id, Trtment, Age_rz, Gender, Ethnic, BAnypet, BWoodstove, BDehumid, BParent_smokes, BFFratio),
  names_from = Visitc, 
  names_prefix = "m",
  values_from = FFratio)
```

## Baseline-Adjusted Long Dataset

```{r}
camp2 <- subset(camp,Visitc != 0)
```

# Descriptive Statistics

```{r}
summary(camp$FFratio) 

# Filled missing FFratio values fixed this:

# FFratio variable has 31 missing values
# Remove missing values
# camp <- na.omit(camp)
```

```{r}
table(camp$Ethnic)
```

```{r}
table(camp$Dehumid)
```

# Baseline statistics

Modify campW variables for a better table:

```{r}
campW$BDehumid[campW$BDehumid == 3] <- NA

campW$GenderF[campW$Gender == "f"] <- 1
campW$GenderF[campW$Gender == "m"] <- 0

campW$Ethnic2[campW$Ethnic == "w"] <- "White"
campW$Ethnic2[campW$Ethnic == "b"] <- "Black"
campW$Ethnic2[campW$Ethnic == "h"] <- "Hispanic"
campW$Ethnic2[campW$Ethnic == "o"] <- "Other"

campW$Age_rz <- as.numeric(campW$Age_rz)

campW$Trtment[campW$Trtment == "1nedocromil"] <- "Nedocromil"
campW$Trtment[campW$Trtment == "0placebo"] <- "Placebo"
campW$Trtment[campW$Trtment == "2budesonide"] <- "Budesonide"
```

Create factored and re-leveled ethnicity variable

```{r}
campW$Ethnic2 <- factor(campW$Ethnic2)
campW$Ethnic2 <- relevel(campW$Ethnic2, ref = "White")
```

Try adding a random value to age to get it to display as an average:

```{r}
for (i in 1:length(campW)) {
  campW$Age_rz2[i] <- campW$Age_rz[i] +rnorm(1)/1000000
  campW$BFFratio2[i] <- campW$BFFratio[i] +rnorm(1)/1000000
}
```


## Table 1

```{r}
campW %>% 
  select(Trtment, BFFratio2, Age_rz2, GenderF, Ethnic2, BAnypet, BWoodstove, BDehumid, BParent_smokes) %>%
  tbl_summary(
  by = Trtment,
  missing = "no",
  digits = list(
    Age_rz2 ~ 2,
    BFFratio2 ~ 2
  ),
  label = list(
    BFFratio2 ~ "FEV1/FVC Ratio",
    Age_rz2 ~ "Age",
    GenderF ~ "Female Gender",
    Ethnic2 ~ "Ethnicity",
    BAnypet ~ "Pet Exposure",
    BWoodstove ~ "Woodstove Exposure",
    BDehumid ~ "Dehumidifier Exposure",
    BParent_smokes ~ "Tobacco Smoke Exposure"
  ),
    statistic = list(
      all_continuous() ~ "{mean}",
      all_categorical() ~ "{n} ({p}%)"),
  ) %>% 
  add_p()%>%
  modify_caption("**Table 1. Baseline Patient Characteristics**") %>%
  bold_labels()
```


## Plots

```{r}
barplot(table(camp2$Age_rz),
        main = "Frequency of Baseline Age",
        ylab = "Frequency",
        xlab = "Age at Baseline")
```


# Time Plot

```{r}
camp_mean <- camp %>%
  # na.omit() %>%       #Moved na.omit to the plotting section
  group_by(Trtment, Visitc) %>%
  summarise(Mean_FFratio = mean(FFratio), N = n())

camp_mean

ggplot(data = camp_mean,
       mapping = aes(x = Visitc, y = Mean_FFratio,
                     colour = factor(Trtment), group = factor(Trtment))) +
  geom_line() + geom_point() + theme(legend.position = "right") +
  labs(y = "Mean FFratio (%)", x = "Follow-up Visit (months)") +
  scale_color_manual(labels = c("Placebo", "Nedocromil", "Budesonide"), values =   c("#009E73", "#0072B2", "#CC79A7")) + 
  ggtitle("Plot of Mean Response Profiles of FFRatio by Treatment") + 
  labs(color = "Treatments")
```

Note that at month 44 there are 1, 6, & 3 observations for placebo, nedocromil, and budesonide respectively; and for month 64 there are 8, 2, & 9 observations for placebo, nedocromil, and budesonide respectively.

# Model Building

## Effectiveness of budesonide vs nedocromil

### RIRS Model

$$Y_{ij} = (\beta_0 + b_{1i}) + \beta_1 \mbox{Nedocromil}_{i} + \beta_2 \mbox{Budesonide}_{i} + (\beta_3 + b_{2i} + \beta_{11} \mbox{Nedocromil}_{i} + \beta_{12} \mbox{Budesonide}_{i})t_{ij}  +\varepsilon_{ij}$$

```{r}
camp.RIRS.reduced <- lme(FFratio ~ Trtment*Fyears,
                   data = camp,
                   random = ~1+Fyears|id,
                   method = "REML",
                   na.action = na.omit
                   )
summary(camp.RIRS.reduced)
```

### RI Model

$$Y_{ij} = (\beta_0 + b_{1i}) + \beta_1 \mbox{Nedocromil}_{i} + \beta_2 \mbox{Budesonide}_{i} + (\beta_3 + \beta_{11} \mbox{Nedocromil}_{i} + \beta_{12} \mbox{Budesonide}_{i})t_{ij}  +\varepsilon_{ij}$$

```{r}
camp.RI.reduced <- lme(FFratio ~ Trtment*Fyears,
                   data = camp,
                   random = ~1|id,
                   method = "REML",
                   na.action = na.omit
                   )
summary(camp.RI.reduced)
```

Comparison to RIRS model:

```{r}
anova(camp.RI.reduced, camp.RIRS.reduced)
```

From Fitzmaurice Table C1, for $q = 1$, a critical value of 14.18 corresponds to p=0.0005; therefore the RIRS model is a better fit.

### Baseline-adjusted RIRS model

$$Y_{ij} = (\beta_0 + b_{1i}) + \beta_1 \mbox{Nedocromil}_{i} + \beta_2 \mbox{Budesonide}_{i} + (\beta_3 + b_{2i} + \beta_{11} \mbox{Nedocromil}_{i} + \beta_{12} \mbox{Budesonide}_{i})t_{ij} + \beta_4 \mbox{Base-FFratio}_i +\varepsilon_{ij}$$

```{r}
camp2.RIRS.reduced <- lme(FFratio ~ Trtment*Fyears + BFFratio,
                   data = camp2,
                   random = ~1+Fyears|id,
                   method = "REML",
                   na.action = na.omit
                   )
summary(camp2.RIRS.reduced)
```

### baseline-adjusted RI Model

$$Y_{ij} = (\beta_0 + b_{1i}) + \beta_1 \mbox{Nedocromil}_{i} + \beta_2 \mbox{Budesonide}_{i} + (\beta_3 + \beta_{11} \mbox{Nedocromil}_{i} + \beta_{12} \mbox{Budesonide}_{i})t_{ij} + \beta_4 \mbox{Base-FFratio}_i +\varepsilon_{ij}$$

```{r}
camp2.RI.reduced <- lme(FFratio ~ Trtment*Fyears + BFFratio,
                   data = camp2,
                   random = ~1|id,
                   method = "REML",
                   na.action = na.omit
                   )
summary(camp2.RI.reduced)
```

Comparison to RIRS model:

```{r}
anova(camp2.RI.reduced, camp2.RIRS.reduced)
anova(camp.RI.reduced, camp.RIRS.reduced)
```

From Fitzmaurice Table C1, for $q = 1$, a critical value of 14.18 corresponds to p=0.0005; therefore the baseline-adjusted RIRS model is a better fit.  The baseline-adjusted and unadjusted models cannot be directly compared; however the baseline adjusted models have lesser AIC & BIC values, greater biological plausibility (espeially for budesonide), 

### Change over time in FEV1/FVC ratio

#### Placebo

Starting with the baseline-adjusted RIRS model; we can test $H_0: \beta_3 = 0$:

```{r}
B3 = c(0,0,0,1,0,0,0)
anova(camp2.RIRS.reduced, L = B3)
```

Therefore we can conclude that there is a change over time in FEV1/FVC ratio for the placebo group.

#### Nedocromil

Starting with the baseline-adjusted RIRS model; we can test $H_0: \beta_3 + \beta_{11}= 0$:

```{r}
B3B11 = c(0,0,0,1,0,1,0)
anova(camp2.RIRS.reduced, L = B3B11)
```

Therefore we fail to reject the null hypothesis; the change over time in FEV1/FVC ratio for the nedocromil group is not statistically significantly different from the null.

#### Budesonide

Starting with the baseline-adjusted RIRS model; we can test $H_0: \beta_3 + \beta_{12} = 0$:

```{r}
B3B12 = c(0,0,0,1,0,0,1)
anova(camp2.RIRS.reduced, L = B3B12)
```

Therefore we can conclude that there is a change over time in FEV1/FVC ratio for the budesonide group.

### Effectiveness of budesonide and nedocromil in preventing decline of FFratio

We can first start with the null hypothesis that neither treatment is effective; $H_0: \beta_{11} = \beta_{12} = 0$:

```{r}
B11 = c(0,0,0,0,0,1,0)
B12 = c(0,0,0,0,0,0,1)
anova(camp2.RIRS.reduced, L = rbind(B11, B12))
```

We can now test each individually:

```{r}
anova(camp2.RIRS.reduced, L = B11)
anova(camp2.RIRS.reduced, L = B12)
```

Therefore nedocromil does not significantly prevent decline of FFratio, but budesonide does have a statistically significant effect on decline in FFratio

### Difference in effectiveness of budesonide vs nedocromil

To test differences in effectiveness of budesonide vs nedocromil we can test differences in immediate effect and effect over time; to test if there is a difference in immediate effect we will test $H_0: \beta_1 - \beta_2 = 0$:

```{r}
B1B2 = c(0,1,-1,0,0,0,0)
anova(camp2.RIRS.reduced, L = B1B2)
```

We find a significant difference in immediate effect.  To test effect over time, we can test $H_0: \beta_{11} - \beta_{12} = 0$:

```{r}
B11B12 = c(0,0,0,0,0,1,-1)
anova(camp2.RIRS.reduced, L = B11B12)
```

We also find a significant difference in effect over time.

### Parameter interpretation of baseline-adjusted RIRS model:

```{r}
summary(camp2.RIRS.reduced)
intervals(camp2.RIRS.reduced, method = "Wald")
```

Intercept 95% CI

```{r}
EST <- 20.57703940
SD <- 4.1355942
UB <- EST + SD*1.96
UB
LB <- EST - SD*1.96
LB
```

Therefore 95% of subjects will have an intercept ($\beta_0 + b_1i$) within (12.47,28.68).

Slope 95% CI

```{r}
EST <- -0.16045427
SD <- 0.7290081
UB <- EST + SD*1.96
UB
LB <- EST - SD*1.96
LB
```

Therefore 95% of subjects will have a slope ($\beta_3 + b_2i$) within (-1.59,1.27).

## Patient characteristics-adjusted associations with FFratio

$$Y_{ij} = (\beta_0 + b_{1i}) + \beta_1 \mbox{Nedocromil}_{i} + \beta_2 \mbox{Budesonide}_{i} + (\beta_3 + b_{2i} + \beta_{11} \mbox{Nedocromil}_{i} + \beta_{12} \mbox{Budesonide}_{i})t_{ij} +\beta_4 \mbox{Gender}_i +\beta_5 \mbox{Ethnicity}_i +\beta_6 \mbox{Pet}_i +\beta_7 \mbox{Woodstove}_i +\beta_8 \mbox{Dehumidifier}_i + \beta_9 \mbox{Base-FFratio}_i  +\varepsilon_{ij}$$

$$\begin{align*}
Y_{ij} &= (\beta_0 + b_{1i}) + \beta_1 \mbox{Nedocromil}_{i} + \beta_2 \mbox{Budesonide}_{i}\\
&\quad+ (\beta_3 + b_{2i} + \beta_{11} \mbox{Nedocromil}_{i} + \beta_{12} \mbox{Budesonide}_{i})t_{ij}\\
&\quad+\beta_4 \mbox{Gender}_i +\beta_5 \mbox{Ethnicity}_i + \beta_6 \mbox{Base-Age}_i\\
&\quad+ \beta_7 \mbox{Smoke}_{ij} + \beta_8 \mbox{Pet}_{ij} +\beta_9 \mbox{Woodstove}_{ij} +\beta_{10} \mbox{Dehumidifier}_{ij} + \beta_{11} \mbox{Base-FEV1/FVC}_{ij}  +\varepsilon_{ij}
\end{align*}$$

```{r}
camp2.RIRS.full <- lme(FFratio ~ Trtment*Fyears + Age_rz + Gender + Ethnic + Anypet + Woodstove + Dehumid + Parent_smokes + BFFratio,
                   data = camp2,
                   random = ~1+Fyears|id,
                   method = "REML",
                   na.action = na.omit
                   )
summary(camp2.RIRS.full)
```

It is interesting to note that treatment effects seem to have similar values in the full model.

### Deletion 1

Given the above results, we can consider removing age from the above model:

```{r}
camp2.RIRS.1 <- lme(FFratio ~ Trtment*Fyears + Gender + Ethnic + Anypet + Woodstove + Dehumid + Parent_smokes + BFFratio,
                   data = camp2,
                   random = ~1+Fyears|id,
                   method = "REML",
                   na.action = na.omit
                   )
summary(camp2.RIRS.1)
```

We can compare the fit of the reduced model

```{r}
anova(camp2.RIRS.full, camp2.RIRS.1)
```

The models have similar fit; therefore we can proceed with the next deletion step.

### Deletion 2

Given the above results, we can consider removing age from the above model:

```{r}
camp2.RIRS.2 <- lme(FFratio ~ Trtment*Fyears + Ethnic + Anypet + Woodstove + Dehumid + Parent_smokes + BFFratio,
                   data = camp2,
                   random = ~1+Fyears|id,
                   method = "REML",
                   na.action = na.omit
                   )
summary(camp2.RIRS.2)
```

We can compare the fit of the reduced model

```{r}
anova(camp2.RIRS.1, camp2.RIRS.2)
```

The models have similar fit; therefore we can proceed with the next deletion step.

### Deletion 3

Given the above results, we can consider removing dehumidifier exposure from the above model:

```{r}
camp2.RIRS.3 <- lme(FFratio ~ Trtment*Fyears + Ethnic + Anypet + Woodstove + Parent_smokes + BFFratio,
                   data = camp2,
                   random = ~1+Fyears|id,
                   method = "REML",
                   na.action = na.omit
                   )
summary(camp2.RIRS.3)
```

We can compare the fit of the reduced model

```{r}
anova(camp2.RIRS.2, camp2.RIRS.3)
```

The models have similar fit; therefore we can proceed with the next deletion step.

### Deletion 4

Given the above results, we can consider removing secondhand smoke exposure from the above model:

```{r}
camp2.RIRS.4 <- lme(FFratio ~ Trtment*Fyears + Ethnic + Anypet + Woodstove + BFFratio,
                   data = camp2,
                   random = ~1+Fyears|id,
                   method = "REML",
                   na.action = na.omit
                   )
summary(camp2.RIRS.4)
```

We can compare the fit of the reduced model

```{r}
anova(camp2.RIRS.3, camp2.RIRS.4)
```

The models have similar fit; therefore we can proceed with the next deletion step.

### Deletion 5

Given the above results, we can consider removing woodstove exposure from the above model:

```{r}
camp2.RIRS.5 <- lme(FFratio ~ Trtment*Fyears + Ethnic + Anypet + BFFratio,
                   data = camp2,
                   random = ~1+Fyears|id,
                   method = "REML",
                   na.action = na.omit
                   )
summary(camp2.RIRS.5)
```

We can compare the fit of the reduced model

```{r}
anova(camp2.RIRS.4, camp2.RIRS.5)
```

The models have similar fit; therefore we can proceed with the next deletion step.

### Deletion 6

Given the above results, we can consider removing ethnicity from the above model:

```{r}
camp2.RIRS.6 <- lme(FFratio ~ Trtment*Fyears + Anypet + BFFratio,
                   data = camp2,
                   random = ~1+Fyears|id,
                   method = "REML",
                   na.action = na.omit
                   )
summary(camp2.RIRS.6)
```

We can compare the fit of the reduced model

```{r}
anova(camp2.RIRS.5, camp2.RIRS.6)
```

The models have similar fit; therefore we can proceed with the next deletion step.

### Deletion 7

Given the above results, we can consider removing pet exposure from the above model:

```{r}
camp2.RIRS.7 <- lme(FFratio ~ Trtment*Fyears + BFFratio,
                   data = camp2,
                   random = ~1+Fyears|id,
                   method = "REML",
                   na.action = na.omit
                   )
summary(camp2.RIRS.7)
```

We can compare the fit of the reduced model

```{r}
anova(camp2.RIRS.6, camp2.RIRS.7)
```

The models have similar fit; therefore we can proceed with the next deletion step.

### Deletion 8

Given the above results, we can consider removing pet exposure from the above model:

```{r}
camp2.RIRS.8 <- lme(FFratio ~ Trtment*Fyears,
                   data = camp2,
                   random = ~1+Fyears|id,
                   method = "REML",
                   na.action = na.omit
                   )
summary(camp2.RIRS.8)
```

We can compare the fit of the reduced model

```{r}
anova(camp2.RIRS.7, camp2.RIRS.8)
```

The models do not have similar fit; therefore model 7 is the parsimonious model.

## Marginal Models

### i. Unstructured covariance

Then create a 'time' variable so that corSymm doesn't complain about 'objects must be a sequence of consecutive integers':

```{r}
camp2$time <- camp2$Visitc
camp2$time[camp2$Visitc == 2] = 1
camp2$time[camp2$Visitc == 4] = 2
camp2$time[camp2$Visitc == 12] = 3
camp2$time[camp2$Visitc == 16] = 4
camp2$time[camp2$Visitc == 24] = 5
camp2$time[camp2$Visitc == 28] = 6
camp2$time[camp2$Visitc == 36] = 7
camp2$time[camp2$Visitc == 40] = 8
camp2$time[camp2$Visitc == 44] = 9
camp2$time[camp2$Visitc == 48] = 10
camp2$time[camp2$Visitc == 52] = 11
camp2$time[camp2$Visitc == 56] = 12
camp2$time[camp2$Visitc == 60] = 13
camp2$time[camp2$Visitc == 64] = 14
camp2$time[camp2$Visitc == 72] = 15
camp2$time[camp2$Visitc == 84] = 16
camp2$time[camp2$Visitc == 96] = 17
camp2$time[camp2$Visitc == 108] = 18
camp2$time[camp2$Visitc == 120] = 19
```

Creating the model:

```{r}
# camp2.reml.uns <- gls(FFratio ~ Trtment*Fyears + Age_rz + Gender + Ethnic + Anypet + Woodstove + Dehumid + Parent_smokes + BFFratio,
#                  data = camp2,
#                  corr = corSymm(form = ~ time | id),       # it does not run if use form =  ~ age | ID
#                  weights = varIdent(form = ~ 1 | Fyears),
#                  method = "REML"
#                      )
```

Summary:

```{r}
# summary(camp2.reml.uns)
# anova(camp2.reml.uns)
```

## Model Tables

### Create dataset for display

Create a display-friendly dataset from the camp2 dataset:

```{r}
camp3 <- camp2
```

Create display-friendly gender variable:

```{r}
camp3$GenderF[camp3$Gender == "f"] <- 1
camp3$GenderF[camp3$Gender == "m"] <- 0
```

Update ethnicity variable for display:

```{r}
camp3$Ethnic2[camp3$Ethnic == "w"] <- "White"
camp3$Ethnic2[camp3$Ethnic == "b"] <- "Black"
camp3$Ethnic2[camp3$Ethnic == "h"] <- "Hispanic"
camp3$Ethnic2[camp3$Ethnic == "o"] <- "Other"

# Create factored and releveled display ethnicity variable:
camp3$Ethnic2 <- factor(camp3$Ethnic2)
camp3$Ethnic2 <- relevel(camp3$Ethnic2, ref = "White")
```

Update treatment variable for display:

```{r}
camp3$Trtment2[camp3$Trtment == "0placebo"] <- "Placebo"
camp3$Trtment2[camp3$Trtment == "1nedocromil"] <- "Nedocromil"
camp3$Trtment2[camp3$Trtment == "2budesonide"] <- "Budesonide"

# Create factored and releveled display treatment variable:
camp3$Trtment2 <- factor(camp3$Trtment2)
camp3$Trtment2 <- relevel(camp3$Trtment2, ref = "Placebo")

```


### Create T2 model with lme4:

```{r}
camp3.RIRS.full <- lmer(
  FFratio ~ Trtment2*Fyears + BFFratio + Age_rz + GenderF + Ethnic2 + Anypet + Woodstove + Dehumid + Parent_smokes + (1+Fyears|id),
  data = camp3,
  REML = TRUE,
  na.action = na.omit
  )
summary(camp3.RIRS.full)
```

### T2

```{r}
(T2 <- tbl_regression(camp3.RIRS.full,
               label = list(
                 BFFratio ~ "Baseline FEV1/FVC Ratio",
                 Trtment2 ~ "Treatment",
                 Fyears ~ "Time (Years)",
                 Age_rz ~ "Baseline Age",
                 Ethnic2 ~ "Ethnicity",
                 GenderF ~ "Female Gender",
                 Anypet ~ "Pet Exposure",
                 Woodstove ~ "Woodstove Exposure",
                 Dehumid ~ "Dehumidifier Exposure",
                 Parent_smokes ~ "Smoke Exposure"
               ),
               # tidy_fun = broom.mixed::tidy
               ) %>%
  add_global_p() %>%
  modify_caption("**Table 2. Full Model Parameters**") %>%
  bold_labels())
```

### Create lme4 Parsimonious baseline-adjusted RIRS model

```{r}
camp3.RIRS.3 <- lmer(
  FFratio ~ Trtment2*Fyears + BFFratio + (1+Fyears|id),
  data = camp3,
  REML = TRUE,
  na.action = na.omit
  )
summary(camp3.RIRS.3)
```

### T3

```{r}
(T3 <- tbl_regression(camp3.RIRS.3,
               label = list(
                 BFFratio ~ "Baseline FEV1/FVC Ratio",
                 Trtment2 ~ "Treatment",
                 Fyears ~ "Time (Years)"
               ),
               # tidy_fun = broom.mixed::tidy
               ) %>%
  add_global_p() %>%
  modify_caption("**Table 3. Parsimonious Model Parameters**") %>%
  bold_labels())
```

# Model Diagnostics

## Scatter plot matrix

```{r cache = TRUE}
pairs(campW[11:15])
```

## Based on Marginal and Conditional Residuals

```{r cache = TRUE}
# Marginal residuals
camp2$pred_m <- predict(camp2.RIRS.reduced, level = 0)

## Predicted Marginal Mean by Marginal Residuals
camp2$resid_m <- camp2$FFratio - camp2$pred_m
  ggplot(data = camp2,
  mapping = aes(x = pred_m, y = resid_m)) +
  geom_point() + geom_hline(yintercept=0)
```

## QQplot at each time point

```{r}
qqnorm(camp2$resid_m[camp2$Visitc==2])
qqnorm(camp2$resid_m[camp2$Visitc==4])
qqnorm(camp2$resid_m[camp2$Visitc==12])
qqnorm(camp2$resid_m[camp2$Visitc==16])
```

Checked normaility assumption for selected timepoints.

## Transformed Residuals

```{r cache = TRUE}
est.cov <- extract.lme.cov(camp2.RIRS.reduced)
li <- t(chol(est.cov))
camp2$cholesky.residuals <- solve(li) %*% camp2$resid_m

## QQ plot of Transformed Residuals
qqnorm(camp2$cholesky.residuals)
```

```{r}
camp2$cholesky.residuals[1:20]  
```

```{r cache = TRUE}
camp2$cholesky.pred <- solve(li) %*% camp2$pred_m
camp2$cholesky.pred[1:20]
```

```{r}
## Transformed Predicted Value by Transformed Residuals
ggplot(mapping = aes(x = camp2$cholesky.pred, y = camp2$cholesky.residuals)) + geom_point() + geom_hline(yintercept=0)
```

```{r cache = TRUE}
mean(abs(camp2$cholesky.residuals))
## Absolute transformed residuals vs. transformed predicted value
ggplot(mapping = aes(x = camp2$cholesky.pred, y = abs(camp2$cholesky.residuals))) + geom_point() + geom_hline(yintercept=0.7148769) + 
                 geom_smooth(method = "loess")
```


## Identify outlying subjects using Mahalanobis distance

```{r cache = TRUE}
camp2$cholesky.residuals.sq <-  camp2$cholesky.residuals^2
Mdistance <-camp2 %>%
   group_by(id) %>%
  summarise(sum = sum(cholesky.residuals.sq), n = n())

Mdistance$pvalue <- pchisq(Mdistance$sum, df = Mdistance$n, ncp = 0, lower.tail = FALSE)
Mdistance$id[Mdistance$pvalue < 0.0001]  
```

refit the model after excluding these observations

```{r}
iexclude = c(2, 86, 100, 197, 285, 321, 366, 373, 384, 401, 404, 441, 499, 531, 553, 557, 561, 587, 639, 659, 686, 722, 744, 764, 771, 788, 790, 796, 809, 819, 879, 902, 910, 1005, 1019, 1024, 1038)
"%ni%" = Negate( "%in%" )
camp4 = filter(camp2, camp2$id %ni% iexclude)

# Refit the model
camp4.RIRS.reduced <- lme(FFratio ~ Trtment*Fyears + BFFratio,
                   data = camp4,
                   random = ~1+Fyears|id,
                   method = "REML",
                   na.action = na.omit
                   )
summary(camp4.RIRS.reduced)
```

There are 37 outlying subjects. After exclusion, the model coefficients and conclusions are similar to the full sample.