-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
reintegrated measure definitions and broke out sections into seperate…
… files
- Loading branch information
Showing
11 changed files
with
1,147 additions
and
595 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,255 @@ | ||
#' Code to calculate the modern ICC version (using linear mixed-effects model). | ||
#' A lot is borrowed from AFNI's 3dICC_REML.R. Big shout out to the AFNI folks. | ||
|
||
#' # Setup | ||
#install.packages(c('lme4','plyr','reshape2')) | ||
library(lme4) | ||
library(plyr) | ||
library(reshape2) | ||
#qc_anat <- read.csv("abide_anat.csv")[,-1] | ||
#qc_func <- read.csv("abide_func.csv")[,-1] | ||
qc_anat <- read.csv("corr_anat.csv")[,-1] | ||
qc_func <- read.csv("corr_func.csv")[,-1] | ||
|
||
|
||
#' # Anatomical | ||
#' | ||
#' Examine the reliability for the anatomical measures. | ||
#' | ||
#' ## Filter Data | ||
#' | ||
#' We only want subjects with multi-session data (i.e., more than one session of data). | ||
#+ anat-filter1 | ||
qc_anat_trt <- ddply(qc_anat, .(subject), function(x) { | ||
if (nrow(x) == 1) { | ||
return(data.frame()) | ||
} else { | ||
return(x) | ||
} | ||
}) | ||
qc_anat_trt$site <- as.factor(as.character(qc_anat_trt$site)) | ||
|
||
#' Now we check the number of subject per session | ||
#' and for each site, we only keep those with 10 subjects per session. | ||
#+ anat-filter2 | ||
table(qc_anat_trt$site, qc_anat_trt$session) | ||
qc_anat_trt <- ddply(qc_anat_trt, .(site, session), function(x) { | ||
if (nrow(x) < 10) { | ||
return(data.frame()) | ||
} else { | ||
return(x) | ||
} | ||
}) | ||
qc_anat_trt$site <- as.factor(as.character(qc_anat_trt$site)) | ||
|
||
#' For each site, we make the number of subjects per session the same. | ||
#+ anat-filter3 | ||
table(qc_anat_trt$site, qc_anat_trt$session) | ||
qc_anat_trt <- ddply(qc_anat_trt, .(site), function(x) { | ||
# Find the session with the minimum number of subjects | ||
nsubs_per_sess <- tapply(x$subject, x$session, length) | ||
sess_min_subs <- as.numeric(names(nsubs_per_sess)[which.min(nsubs_per_sess)]) | ||
# Only use those subjects | ||
subs_to_use <- x$subject[x$session == sess_min_subs] | ||
x_to_use <- subset(x, subject %in% subs_to_use) | ||
x_to_use | ||
}) | ||
|
||
#' Finally, a few subjects have more than one scan per site. Keep the first one. | ||
#' There aren't enough to look at within-session reliability. | ||
#+ anat-filter4 | ||
table(qc_anat_trt$site, qc_anat_trt$session) | ||
qc_anat_trt <- ddply(qc_anat_trt, .(site, subject, session), function(x) { | ||
x[1,] | ||
}) | ||
|
||
#' Now we are done, let's see what we have left | ||
#+ anat-filter5 | ||
table(qc_anat_trt$site, qc_anat_trt$session) | ||
|
||
#' ## Reshape | ||
#' | ||
#' We want to do the computations for each site and each measure. So we will need to reshape the matrix. | ||
#+ anat-reshape | ||
qc_anat_trt2 <- melt(qc_anat_trt, id=c("subject", "session","series", "site"), variable.name="measure") | ||
|
||
|
||
#' ## ICC | ||
#' | ||
#' For each measure and site, we calculate the ICC. | ||
#' We do this with our own code so we can ensure non-negative values amongst other things. | ||
#' | ||
#+ calc-icc | ||
calc_icc <- function(x) { | ||
# ICC Juicy Core | ||
nFact <- 2 | ||
myStat<-vector(mode="numeric", length=nFact+1) | ||
fmAOV <- lmer(value ~ 1 + (1|subject) + (1|session), x) | ||
for(i in 1:nFact) myStat[i] <- VarCorr(fmAOV)[[i]][1] # factor variances | ||
myStat[nFact+1] <- attr(VarCorr(fmAOV), "sc")^2 # residual variance | ||
myStat <- myStat/sum(myStat) | ||
icc <- myStat[1] | ||
return(icc) | ||
} | ||
qc_anat_icc <- ddply(qc_anat_trt2, .(measure, site), function(x) { | ||
# matrix: subject x session | ||
x$subject <- as.factor(x$subject) | ||
res <- calc_icc(x) | ||
nsubjects <- length(unique(x$subject)) | ||
nsessions <- length(unique(x$session)) | ||
c(subjects=nsubjects, sessions=nsessions, icc=res) | ||
}) | ||
|
||
|
||
|
||
#' # Functional | ||
#' | ||
#' Examine the reliability for the functional measures. | ||
#' | ||
#' ## Filter Data | ||
#' | ||
#' We only want subjects with multi-session/scan data. Since functional data has | ||
#' many scans within a session, we will make that clean and only look at within session | ||
#' data for the 1st scan. | ||
#+ func-filter1 | ||
names(qc_func)[1:4]=c('subject','session','scan','site') | ||
qc_func_trt <- ddply(qc_func, .(subject), function(x) { | ||
if (nrow(x) == 1) { | ||
return(data.frame()) | ||
} else { | ||
return(x) | ||
} | ||
}) | ||
qc_func_trt$site <- as.factor(as.character(qc_func_trt$site)) | ||
|
||
# There seems to be some duplicates, remove them | ||
qc_func_trt <- ddply(qc_func_trt, .(subject, session, scan), function(x) x[1,]) | ||
|
||
#' Now we check the number of subject per session | ||
#' and for each site, we only keep those with 10 subjects per session. | ||
#+ func-filter2 | ||
table(qc_func_trt$site, qc_func_trt$session) | ||
qc_func_trt <- ddply(qc_func_trt, .(site, session), function(x) { | ||
if (nrow(subset(x, scan == 1)) < 10) { | ||
return(data.frame()) | ||
} else { | ||
return(x) | ||
} | ||
}) | ||
qc_func_trt$site <- as.factor(as.character(qc_func_trt$site)) | ||
|
||
#comp <- ddply(qc_func_trt, .(site), function(x) { | ||
# table(x$session, x$scan) | ||
#}) | ||
#comp[is.na(comp)] <- 0 | ||
#comp | ||
table(qc_func_trt$site, qc_func_trt$scan) | ||
|
||
#' Here we get the between session data. | ||
#' For each site, we make the number of subjects per session the same. | ||
#' Finally, a few subjects have more than one scan per site. Keep the first one. | ||
#+ func-filter3 | ||
table(qc_func_trt$site, qc_func_trt$session) | ||
qc_func_trta <- ddply(qc_func_trt, .(site), function(x) { | ||
# Find the session with the minimum number of subjects | ||
nsubs_per_sess <- tapply(x[x$scan == 1,]$subject, x[x$scan == 1,]$session, length) | ||
sess_min_subs <- as.numeric(names(nsubs_per_sess)[which.min(nsubs_per_sess)]) | ||
# Only use those subjects | ||
subs_to_use <- unique(x$subject[x$session == sess_min_subs]) | ||
x_to_use <- subset(x, subject %in% subs_to_use) | ||
x_to_use | ||
}) | ||
qc_func_trta$site <- as.factor(as.character(qc_func_trta$site)) | ||
qc_func_trta <- ddply(qc_func_trta, .(site, subject, session), function(x) { | ||
x[1,] | ||
}) | ||
qc_func_trta <- ddply(qc_func_trta, .(site), function(x) { | ||
if (length(unique(x$session)) == 1) { | ||
return(data.frame()) | ||
} else { | ||
return(x) | ||
} | ||
}) | ||
qc_func_trta$site <- as.factor(as.character(qc_func_trta$site)) | ||
|
||
#' Here we get the within session data. We will only look at the first session | ||
#' and keep all the possibly scans for each subject. | ||
#+ func-filter4 | ||
table(qc_func_trt$site, qc_func_trt$session) | ||
qc_func_trtb <- ddply(subset(qc_func_trt, session == 1), .(site), function(x) { | ||
# Find the scan with the minimum number of subjects | ||
nsubs_per_scan <- tapply(x$subject, x$scan, length) | ||
scan_min_subs <- as.numeric(names(nsubs_per_scan)[which.min(nsubs_per_scan)]) | ||
# Only use that many scan | ||
subs_to_use <- unique(x$subject[x$scan == scan_min_subs]) | ||
x_to_use <- subset(x, subject %in% subs_to_use) | ||
x_to_use | ||
}) | ||
qc_func_trtb <- ddply(qc_func_trtb, .(site), function(x) { | ||
if (length(unique(x$scan)) == 1) { | ||
return(data.frame()) | ||
} else { | ||
return(x) | ||
} | ||
}) | ||
qc_func_trtb$site <- as.factor(as.character(qc_func_trtb$site)) | ||
|
||
#' Now we are done, let's see what we have left | ||
#+ anat-filter5 | ||
table(qc_func_trta$site, qc_func_trta$session) | ||
table(qc_func_trtb$site, qc_func_trtb$scan) | ||
|
||
|
||
## shlee: add perturbation | ||
original = qc_func_trta | ||
qc_func_trta = original | ||
|
||
qc_func_trta1=qc_func_trta | ||
qc_func_trta1 <- ddply(qc_func_trta, .(subject), function(x) { | ||
y1 = x$num_fd; y1 = y1 + rnorm(1,0,1); x$num_fd =y1 | ||
y2 = x$perc_fd; y2 = y2 + rnorm(1,0,1); x$perc_fd=y2 | ||
return(x) | ||
}) | ||
|
||
hist(qc_func_trta1$perc_fd) | ||
|
||
|
||
|
||
#' ## Reshape | ||
#' | ||
#' We want to do the computations for each site and each measure. So we will need to reshape the matrix. | ||
#+ anat-reshape | ||
qc_func_trt_btw <- melt(qc_func_trta, id=c("subject", "session", "scan", "site"), variable.name="measure") | ||
|
||
qc_func_trt_win <- melt(qc_func_trtb, id=c("subject", "session", "scan", "site"), variable.name="measure") | ||
|
||
|
||
|
||
#' ## ICC | ||
#' | ||
#' For each measure and site, we calculate the ICC. | ||
#+ calc-icc | ||
calc_icc <- function(x) { | ||
# ICC Juicy Core | ||
nFact <- 2 | ||
myStat<-vector(mode="numeric", length=nFact+1) | ||
fmAOV <- lmer(value ~ 1 + (1|subject) + (1|session), x) | ||
for(i in 1:nFact) myStat[i] <- VarCorr(fmAOV)[[i]][1] # factor variances | ||
myStat[nFact+1] <- attr(VarCorr(fmAOV), "sc")^2 # residual variance | ||
myStat <- myStat/sum(myStat) | ||
icc <- myStat[1] | ||
return(icc) | ||
} | ||
qc_func_icc_btw <- ddply(qc_func_trt_btw, .(measure, site), function(x) { | ||
# matrix: subject x session | ||
x$subject <- as.factor(x$subject) | ||
res <- calc_icc(x) | ||
nsubjects <- length(unique(x$subject)) | ||
nsessions <- length(unique(x$session)) | ||
c(subjects=nsubjects, sessions=nsessions, icc=res) | ||
}) | ||
|
||
|
||
#' # Save | ||
write.csv(qc_anat_icc, file="corr_qc_anat_icc_btw_sess.csv") | ||
write.csv(qc_func_icc_btw, file="corr_qc_func_icc_btw_sess.csv") |
Oops, something went wrong.