-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathtutorial_preprocEDF.R
341 lines (291 loc) · 16.4 KB
/
tutorial_preprocEDF.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
## This file serves as a template for preprocessing Eyelink eyetracking data in
## R.Only works on Linux. Check dependencies of edfR online.
## author: Wanja Moessing, November, 2018, [email protected]
# The basic idea is that you ran an experiment in python or matlab and during
# each trial, you sent messages (e.g., with EyelinkSendTabMsg from my
# repository). Those messages should always mark interesting timepoints, just
# like TTL triggers in EEG recordings do. In addition you might have sent one
# or more, messages per trial, that included the behavioral data of that trial.
# The goal of this script is to parse the edf file, extract the behavioral data
# , downsample the eyetracking data, align behavioral and eyetracking data in a
# long table format, interpolate blinks, cut out useless time intervals (like
# between two trials), and finally store all subjects data in one huge file. In
# a last step, Eyetracking data are "downsampled" to one datum per trial, to get
# an additional copy with behavioral data only.
################################################################################
######################## STEP 1: necessary packages ############################
################################################################################
## We need a couple of packages. Some are not available on CRAN, but only from
## Github. Some of those (like mine) are simply not well enough documented and
## maintained to be made *that* public. Others, like edfr, depend on third-party
## software (SR's devpack). To install from github, we need the install_github()
## function from the devtools-package.
# install.packages('devtools','doParallel','data.table','ggplot2','lme4','dplyr','afex')
# devtools::install_github('hedderik/pR')
# devtools::install_github('wanjam/wm_utilities/helper-funs/wmR')
# devtools::install_github('hedderik/hvR')
# devtools::install_github('jashubbard/edfR')
# devtools::install_github("jwdink/eyetrackingR") #check http://www.eyetracking-r.com for elaborate tutorial
#Differences EDF2R/edfR:
## WHILE EDF2R works on all OS, edfR does not work on windows. However, it
## imports more stuff (eg saccades etc).
## edf2R only imports the frst onset of a recordingblock, edfR all (extract first to get meaningful info)
## edf2R sets missing gaze info (e.g., during blinks) to NA. edfR to 1e+08 (=100000000).
## edf2R samples are a few rows longer because edfR deletes nonsense info at end automatically
## rownames are a bit different but generally quite intuitive in both packages.
## edf2R only reads ttl triggers in wanja's modified version. edfR can simulate
## this via edf.events(file,type="INPUTEVENT",fields = c("input","sttime"))
################################################################################
######################## STEP 2: Preparation & Parameters ######################
################################################################################
## load libraries
pkgs <- c('data.table', 'hvR', 'pR', 'doParallel', 'edfR', 'stringr')
sapply(pkgs, require, character.only = TRUE)
## Define baselineperiod & Hz for downsampling
# Below, you can define to what the time used by the baseline will be erelative.
# (e.g., trial onset). So the numbers here always depend on whatever you write
# below. Look for function 'normalize' ~ line 299.
baselineperiod <- c(-100, 0)
outHz <- 100
## Extracting behavioral data is always the same for all subjects. That is, this
## task is "embarassingly parallel". You can activate these lines to run things
## in parallel, once you have many subjects to process.
## Start workers (i.e., R-Sessions) on other cores & load packages in these sessions.
cl <- makeCluster(2, outfile = "")
registerDoParallel(cl)
## Specify in- and output
EDFpath <- "EDF/" # adjust this to the path where your .edf files are stored
RDATpath <- "EDF/Rdat" # output directory for temporary parsed files
OUTpath <- "Eyedata/" # output directory
EDFpattern <- "TE_\\d+.[edf|EDF]" # pattern used to match your filenames (see https://github.com/rstudio/cheatsheets/raw/master/strings.pdf)
RDATpattern <- "TE_\\d+.Rdat"
OUTprefix <- "TutorialExperiment"
# create output folders
if (!dir.exists(RDATpath)) {dir.create(RDATpath)}
if (!dir.exists(OUTpath)) {dir.create(OUTpath)}
################################################################################
######################## STEP 3: Parse EDF files ###############################
################################################################################
## Check the directories from above for existing edf files and also for existing
## Rdat files. We check for Edat files as well, to avod parsing already parsed
## files again.
edfs <- str_replace_all(list.files(path = EDFpath, pattern = EDFpattern),
'.edf', '')
Rdats <- str_replace_all(list.files(path = RDATpath, pattern = RDATpattern),
'.Rdat', '')
## convert EDFs that haven't been processed yet to Rdats. This step is Linux only!
# compare existing edfs with existing Rdats
unprocessed_edfs <- edfs[!str_detect(edfs, Rdats)]
if (length(unprocessed_edfs) > 0) {
# run a parallel for loop across remaining edfs. tip: you can change %dopar% to %do% to make this a regular for-loop
foreach(fname = unprocessed_edfs, .packages = c('data.table','edfR')) %dopar% {
curfile <- paste0(EDFpath, fname, '.edf')
# edf.all parses 'all' possible data.types in the file. You can also just
# parse parts of it. Usually that's not gonna be necessary, though.
curdat <- edf.all(curfile, samples = TRUE, eventmask = TRUE)
# The file header contains some meta-info (tracked eye, Hz, etc). get that:
curdat$recordingInfo <- edf.recordings(curfile)
curdat$recordingInfo <- curdat$recordingInfo[1,] # we only need the info once
# If you did EEG as well, you might want to also parse the TTL triggers.
curdat$ttl <- edf.events(curfile, type = "INPUTEVENT",
fields = c("sttime","input"))
save(file = paste0(RDATpath, fname, ".Rdat"), curdat)
}
# if we preprocessed some new files, check what Rdat files do exist now
Rdats <- str_replace_all(list.files(path = RDATpath, pattern = RDATpattern),
'.Rdat', '')
} else {
cat("Files seem to be converted already!")
}
################################################################################
######################## STEP 4: Extract Behavioral Data #######################
################################################################################
## Read the RDAT files, and process them:
cat('\nPerforming coregistration of Eyetracking and behavioral data for all ',
'subjects.\nThis might take a while...')
## For each subject, we loop over all messages sent to the Eyelink during a
## session. Once we find a message that starts with '<', we know that it's
## special, as we added that to all our special messaged during the experiment.
## We split up the tab-delimited messages following the '<' and extract the
## relevant data. From our experimental code, we know what infomation is at what
## position.
# note that we use <.combine = 'rbind'> here. That means, all results of the
# independent instances of the loop will be stacked by rows. Thus we can assign
# the single resulting data.table to one variable called 'samples'.
samples <- foreach(
fname = Rdats, .packages = c('data.table','pR'),
.combine = 'rbind') %do% {
# load the current subject file:
load(paste0(RDATpath, fname, ".Rdat"))
# convert to data.table for more efficient syntax, speed, and RAM use.
samples <- data.table(curdat$samples) #curdat is the data.table of the single subject's raw data
curdat$samples <- NULL # save RAM
setkey(samples, time) # keys samples by time (speed)
## Find out which eye was used and delete the nonsense columns of the other eye
if (curdat$recordingInfo$eye == 2) {
setnames(samples, c("Time", "Flags", "remove1", "remove2", "remove3", "X",
"Y", "Dil", "Blink", "Fixation", "Saccade"))
} else if (curdat$recordingInfo$eye == 1) {
setnames(samples, c("Time", "Flags", "X", "Y", "Dil", "remove1", "remove2",
"remove3", "Blink", "Fixation", "Saccade"))
} else {
stop("I cannot determine which Eye has been tracked\n") # talk to me if you catch this error. There's a seldom problem with a workaround...
}
# delete the nonsense columns of the other eye
samples[, c('remove1', 'remove2', 'remove3') := NULL]
## Assign trial information to all the samples. Register the start time of a trial,
## store the relevant variables, and then when we know the end time, assign these variables
## to the output data.table "samples".
numTrial <- 0
# in case this loop gives the warning "input string 1 is invalid in this
# locale", use Sys.setlocale(locale="C")
## now we loop over all messages, including all the stuff that's auto-sent
## by eyelink.
for (i in 1:nrow(curdat$messages)) {
# store the current message in e
e <- curdat$messages[i,]
# split it by tabs
msgSplit <- strsplit(e$msg,"\t")[[1]]
# our special informative messages have a common '>' as first character
# skip iteration if not present
if (msgSplit[1] != '>') {next}
# we only get here if it's a special message. now we need to check which
# one. That's usually written in the second argument. e.g., if you sent
# something like EyelinkSendTabMsg('TrlOn',ID,itrial) in Matlab, you'd be
# looking for 'TrlOn'
if (msgSplit[2] == 'TrlOn') {
# yay! we found the onset of a new trial
numTrial <- numTrial + 1
cat("Working on trial ", numTrial, "\t of file:", fname,
"\t on worker:", Sys.getpid(), "\n")
# as the messages are already aligned with the eyetrackig data, we can
# use the eyetrackers time vector to store the absolute time when this
# message was sent.
tmp.startT <- as.numeric(e$time)
foundNewTrl <- TRUE
}
## once we found that starting message, we keep on looping, trying to find
## more messages for the current trial.
if (foundNewTrl) {
if (msgSplit[2] == "FixOn") {
tmp.FixOn <- as.numeric(e$time)
}
if (msgSplit[2] == "FixOff") {
tmp.FixOff <- as.numeric(e$time)
}
## the "Info" message is, in my case, special for two reasons:
## 1. it contains all behavioral data
## 2. it is the last message in a trial and hence marks the end of a trial
## Because it marks the end of a trial, whenever we catch this message
## we add all our temporary variables to our output data.table
if (msgSplit[2] == "Info") {
tmp.SubjNr <- as.integer(msgSplit[3])
tmp.TrlNr <- as.integer(msgSplit[4]) # if on python, keep in mind that it starts counting from 0
tmp.endT <- as.numeric(e$time)
# add information coded in messages in long format to our 1000Hz samples-DT
# we now start & end time of the current trial in eyetracker-time,
# so we assign all behavioral values to the respective part of the
# huge data.table
samples[Time %between% list(startT, endT), ':='(
Subject = tmp.SubjNr,
Trial = tmp.TrlNr,
startTime = tmp.startT,
endTime = tmp.endT,
RelTime = Time - tmp.startT,
FixationOnset = tmp.FixOn - tmp.startT,
RespOnset = tmp.RespOn - tmp.startT,
RespOffset = tmp.RespOff - tmp.startT
)]
# Deleting the temporary variables after each trial makes sure we
# don't mix values between trials.
allvars <- ls()
tmpvars <- allvars[str_detect(allvars, 'tmp.')]
rm(list = tmpvars)
rm(allvars, tmpvars)
# we're done with the trial, so set this to false again, until we find
# another trial start
foundNewTrl = FALSE
}
}
}
############################################################################
######################## STEP 5: Massage Eyetracking Data ##################
############################################################################
## add mean and peak velocity per saccade
# set keys, so data.table can do a fast vector scan
setkey(samples, Time)
saccades <- as.data.table(curdat$saccades)
setkey(saccades, sttime, entime)
# extract only those saccades that happened during a trial
saccades <- saccades[sttime %in% samples[!is.na(Subject), Time] |
entime %in% samples[!is.na(Subject), Time],
]
cat("Adding saccade velocity to samples Data.Table\n")
for (i in 1:nrow(curdat$saccades)) {
samples[Time %between% list(saccades[i, sttime], saccades[i, entime]),
':='(
AverageVelocity = saccades[i, avel],
PeakVelocity = saccades[i, pvel],
IthSaccadeThisSubject = i)
]
}
## add TTL to long format
curdat$ttl <- as.data.table(curdat$ttl)
cat("\nAdding TTL triggers to samples data.table\n")
for (i in 1:nrow(curdat$ttl)) {
ttlOnset <- curdat$ttl[i, sttime]
ttl <- curdat$ttl[i, input]
samples[Time == ttlOnset, ':='(TTL = ttl)]
}
## remove temporary variables
rm('curdat', 'ttl', 'ttlOnset', 'saccades')
## Remove the samples that weren't between a TrialStart and a TrialEnd
## message (and hence don't have a subject-Nr). Conserves RAM & HDD
setkey(samples, Subject)
samples <- samples[!is.na(Subject),]
cat('\nDetecting blinks') # I found 20 to be a good value. Check the functions documentation if you want more info
samplesB <- detectblinks(samples, maxDeltaDilation = 20)
cat('\nExpanding blinks')
samplesB <- expandblinks(samplesB, 50)
cat('\nPerforming linear interpolation for expanded blinks')
samplesB <- interpolateblinks(samplesB)
cat('\nBaseline correction of pupil dilation data [baseline-period:',
baselineperiod[1], '-', baselineperiod[2], 'ms]')
samples <- normalize(samplesB, samplesB$RelTime >= baselineperiod[1] &
samplesB$RelTime <= baselineperiod[2])
## We don't need the absolute time. So call the time relative to trial onset
## 'Time'. Likewise overwrite non-normalized Dilation with the normalized one.
samples[, ':='(Time = RelTime, Dil = DilN)]
## Downsample data to outHz (as specified at the top). My fast_downsample()
## function is vectorized and therefore about 600x faster than
## wmR::my_downsample() and pR::downsample(). However, the input data.table
## needs a column called 'Trial'. If you don't have trials, simply add this
## column and make it all <1>.
cat('\ndownsampling...')
samples <- wmR::fast_downsample(samples,
by = c(
'Subject', 'Trial', 'startTime',
'endTime', 'RelTime','FixationOnset'),
Hz = outHz)
return(samples)
}
################################################################################
######################## STEP 6: Store Dataset #################################
################################################################################
cat('\nsaving...')
save(file = paste0(OUTpath, OUTprefix, "_AllSubjectEyeData.Rdat"), samples)
################################################################################
######################## STEP 7: Create a copy with behavioral data only #######
################################################################################
## create a copy of samples without eyetracking information. That is just
## behavioral data. This will save so much memory, we'll be able to run analyses
## on laptops...
cat('\nDownsampling for behavior only copy...')
samples <- samples[,.(Dil = mean(Dil, na.rm = T),
X = mean(X, na.rm = T),
Y = mean(Y, na.rm = T)),
by = .(Subject, Trial, startTime, RelTime, FixationOnset)]
cat('\nsaving...')
save(file = paste0(OUTpath, OUTprefix, "_AllSubjectBehavData.Rdat"), samples)
stopCluster(cl)
cat('\ndone!')