From 022f1d02eb2f9e9f473ec5157342291609345373 Mon Sep 17 00:00:00 2001 From: Veerle van Leemput Date: Thu, 28 Mar 2024 13:16:45 +0100 Subject: [PATCH 01/19] refactor: separate raw and aggregated processing --- R/read_embrace_plus.R | 45 ++++++++++++++++++++++++++++++++++++---- man/read_embrace_plus.Rd | 12 +++++++---- 2 files changed, 49 insertions(+), 8 deletions(-) diff --git a/R/read_embrace_plus.R b/R/read_embrace_plus.R index 1194215..e6fc721 100644 --- a/R/read_embrace_plus.R +++ b/R/read_embrace_plus.R @@ -28,6 +28,9 @@ get_timestamp_column <- function(start_time, sampling_freq, len_list, tz) { return(timestamp_df) } + + + #' Create dataframe for psychological factors #' @description Creates a dataframe for psychological factors #' @param data list of dataframes @@ -83,6 +86,9 @@ create_dataframes <- function(data, type, file, vars = c("x", "y", "z"), return(df) } + + + #' Read Embrace Plus data #' @description Reads in Embrace Plus data as a list (with EDA, HR, Temp, ACC, BVP, IBI as dataframes), and prepends timecolumns #' @details This function reads in a zipfile as exported by Embrace Plus. Then it extracts the zipfiles in a temporary folder @@ -93,16 +99,19 @@ create_dataframes <- function(data, type, file, vars = c("x", "y", "z"), #' The function returns an object of class "embrace_plus_data" with a prepended datetime columns. #' The object contains a list with dataframes from the physiological signals. #' -#' @param zipfile A zip file as exported by the instrument +#' @param zipfile A zip file as exported by the instrument. Can be aggregatd data, or raw data. +#' @param type The type of data contained in the zip file. Either "raw" or "aggregated". #' @param tz The timezone used by the instrument (defaults to user timezone). #' @examples -#' library(wearables) -#' # read_embrace_plus("yourpathtohezipfile.zip") +#' \dontrun{ +#' library(wearables) +#' read_embrace_plus("yourpathtohezipfile.zip") +#' } #' @export -#' @import sparklyr #' @import cli #' @importFrom dplyr pull read_embrace_plus <- function(zipfile, + type = "raw", tz = Sys.timezone()) { # Check if file exists @@ -110,6 +119,33 @@ read_embrace_plus <- function(zipfile, cli_abort("File does not exist") } + # Check type + if (!type %in% c("raw", "aggregated")) { + cli_abort("type must be either 'raw' or 'aggregated'") + } + + if (type == "raw") { + return(read_raw_embrace_plus(zipfile, tz)) + } + + if (type == "aggregated") { + return(read_aggregated_embrace_plus(zipfile, tz)) + } + +} + + + + +#' Extracts avro files from raw data +#' @description Processes .avro files +#' @param tz timezone +#' @keywords internal +#' @import sparklyr +#' @import cli +#' @noRd +read_raw_embrace_plus <- function(zipfile, tz) { + # Check for already installed Spark versions # if none available, install the latest version if (nrow(spark_available_versions()) == 0) { @@ -251,4 +287,5 @@ read_embrace_plus <- function(zipfile, tz = tz ) ) + } diff --git a/man/read_embrace_plus.Rd b/man/read_embrace_plus.Rd index 0ef0c84..7d8b36a 100644 --- a/man/read_embrace_plus.Rd +++ b/man/read_embrace_plus.Rd @@ -4,10 +4,12 @@ \alias{read_embrace_plus} \title{Read Embrace Plus data} \usage{ -read_embrace_plus(zipfile, tz = Sys.timezone()) +read_embrace_plus(zipfile, type = "raw", tz = Sys.timezone()) } \arguments{ -\item{zipfile}{A zip file as exported by the instrument} +\item{zipfile}{A zip file as exported by the instrument. Can be aggregatd data, or raw data.} + +\item{type}{The type of data contained in the zip file. Either "raw" or "aggregated".} \item{tz}{The timezone used by the instrument (defaults to user timezone).} } @@ -24,6 +26,8 @@ The function returns an object of class "embrace_plus_data" with a prepended dat The object contains a list with dataframes from the physiological signals. } \examples{ -library(wearables) -# read_embrace_plus("yourpathtohezipfile.zip") +\dontrun{ + library(wearables) + read_embrace_plus("yourpathtohezipfile.zip") +} } From 23e02dab9ce558e2cb68fc410b0cc83affa5ca44 Mon Sep 17 00:00:00 2001 From: Veerle van Leemput Date: Thu, 28 Mar 2024 13:58:09 +0100 Subject: [PATCH 02/19] add `read_aggregated_embrace_plus` --- R/read_embrace_plus.R | 98 ++++++++++++++++++++++++++++++++++------ man/read_embrace_plus.Rd | 2 +- 2 files changed, 86 insertions(+), 14 deletions(-) diff --git a/R/read_embrace_plus.R b/R/read_embrace_plus.R index e6fc721..e351567 100644 --- a/R/read_embrace_plus.R +++ b/R/read_embrace_plus.R @@ -89,12 +89,41 @@ create_dataframes <- function(data, type, file, vars = c("x", "y", "z"), + +#' Unzip files and store files in temporary directory +#' @description Extracts avro or csv files from a zip file +#' @param zipfile path to the zip file +#' @param type type of file to extract +#' @keywords internal +#' @noRd +unzip_embrace_plus <- function(zipfile, type) { + + # Extract files to a temporary folder + path <- paste0(tempdir(), "/extracted") + + # if path exists, remove content + if (dir.exists(path)) { + unlink(path, recursive = TRUE) + } + + unzip(zipfile = zipfile, + exdir = path) + + files <- list.files(path, recursive = TRUE, pattern = sprintf("[.]%s$", type), full.names = TRUE) + + return(files) +} + + + + + #' Read Embrace Plus data #' @description Reads in Embrace Plus data as a list (with EDA, HR, Temp, ACC, BVP, IBI as dataframes), and prepends timecolumns #' @details This function reads in a zipfile as exported by Embrace Plus. Then it extracts the zipfiles in a temporary folder #' and unzips them in the same temporary folder. #' -#' The unzipped files are avro files, and are read in with using `sparklyr`, which sets up a local Spark cluster. +#' The unzipped files are avro or csv files, where avro files are read in with using `sparklyr`, which sets up a local Spark cluster. #' #' The function returns an object of class "embrace_plus_data" with a prepended datetime columns. #' The object contains a list with dataframes from the physiological signals. @@ -137,6 +166,60 @@ read_embrace_plus <- function(zipfile, +#' Extract csv files from data +#' @description Processes .csv files +#' @param tz timezone +#' @keywords internal +#' @import cli +#' @noRd +read_aggregated_embrace_plus <- function(zipfile, tz) { + + # e4 reference: c("EDA", "ACC", "TEMP", "HR", "BVP") + + csv_files <- unzip_embrace_plus(zipfile, "csv") + + # Get the content before .csv and after the last _ (but include -) + dataset_names <- gsub(".*?([A-Za-z0-9\\-]+)[.]csv", "\\1", csv_files) + dataset_names <- toupper(dataset_names) + dataset_names <- gsub("TEMPERATURE", "TEMP", dataset_names) + dataset_names <- gsub("SLEEP-DETECTION", "SLEEP", dataset_names) + dataset_names <- gsub("PULSE-RATE", "HR", dataset_names) + dataset_names <- gsub("MOVEMENT-INTENSITY", "MOVE", dataset_names) + dataset_names <- gsub("RESPIRATORY-RATE", "RR", dataset_names) + dataset_names <- gsub("WEARING-DETECTION", "WEAR", dataset_names) + csv_files <- setNames(csv_files, dataset_names) + + csv_list <- list() + + for (i in 1:length(csv_files)) { + + file <- csv_files[i] + + this_file <- read.csv(file, stringsAsFactors = FALSE) + + rename_cols <- list(c("timestamp_iso", "DateTime"), + c("timestamp_unix", "unix_timestamp"), + c("eda_scl_usiemens", "EDA")) + + for (j in rename_cols) { + if (j[[1]] %in% colnames(this_file)) { + names(this_file)[names(this_file) == j[[1]]] <- j[[2]] + } + } + + # further pre-processing + this_file$DateTime <- as.POSIXct(gsub("T|Z", " ", this_file$DateTime), tz = tz) + + csv_list[[names(file)]] <- this_file + + } + + return(csv_list) + +} + + + #' Extracts avro files from raw data #' @description Processes .avro files #' @param tz timezone @@ -161,18 +244,7 @@ read_raw_embrace_plus <- function(zipfile, tz) { packages = "org.apache.spark:spark-avro_2.12:3.5.0") cli_alert_success("Connected!") - # Extract files to a temporary folder - path <- paste0(tempdir(), "/extracted") - - # if path exists, remove content - if (dir.exists(path)) { - unlink(path, recursive = TRUE) - } - - unzip(zipfile = zipfile, - exdir = path) - - avro_files <- list.files(path, recursive = TRUE, pattern = "[.]avro$", full.names = TRUE) + avro_files <- unzip_embrace_plus(zipfile, type = "avro") cli_alert_info("About to start processing {length(avro_files)} avro file{?s}") diff --git a/man/read_embrace_plus.Rd b/man/read_embrace_plus.Rd index 7d8b36a..9920b6e 100644 --- a/man/read_embrace_plus.Rd +++ b/man/read_embrace_plus.Rd @@ -20,7 +20,7 @@ Reads in Embrace Plus data as a list (with EDA, HR, Temp, ACC, BVP, IBI as dataf This function reads in a zipfile as exported by Embrace Plus. Then it extracts the zipfiles in a temporary folder and unzips them in the same temporary folder. -The unzipped files are avro files, and are read in with using `sparklyr`, which sets up a local Spark cluster. +The unzipped files are avro or csv files, where avro files are read in with using `sparklyr`, which sets up a local Spark cluster. The function returns an object of class "embrace_plus_data" with a prepended datetime columns. The object contains a list with dataframes from the physiological signals. From cfd4f65d8476f3a215f84f74ac14ef091575d992 Mon Sep 17 00:00:00 2001 From: Veerle van Leemput Date: Thu, 28 Mar 2024 14:13:23 +0100 Subject: [PATCH 03/19] make `unzip_files` a utils function --- R/read_embrace_plus.R | 33 ++------------------------------- 1 file changed, 2 insertions(+), 31 deletions(-) diff --git a/R/read_embrace_plus.R b/R/read_embrace_plus.R index e351567..a74f492 100644 --- a/R/read_embrace_plus.R +++ b/R/read_embrace_plus.R @@ -89,35 +89,6 @@ create_dataframes <- function(data, type, file, vars = c("x", "y", "z"), - -#' Unzip files and store files in temporary directory -#' @description Extracts avro or csv files from a zip file -#' @param zipfile path to the zip file -#' @param type type of file to extract -#' @keywords internal -#' @noRd -unzip_embrace_plus <- function(zipfile, type) { - - # Extract files to a temporary folder - path <- paste0(tempdir(), "/extracted") - - # if path exists, remove content - if (dir.exists(path)) { - unlink(path, recursive = TRUE) - } - - unzip(zipfile = zipfile, - exdir = path) - - files <- list.files(path, recursive = TRUE, pattern = sprintf("[.]%s$", type), full.names = TRUE) - - return(files) -} - - - - - #' Read Embrace Plus data #' @description Reads in Embrace Plus data as a list (with EDA, HR, Temp, ACC, BVP, IBI as dataframes), and prepends timecolumns #' @details This function reads in a zipfile as exported by Embrace Plus. Then it extracts the zipfiles in a temporary folder @@ -176,7 +147,7 @@ read_aggregated_embrace_plus <- function(zipfile, tz) { # e4 reference: c("EDA", "ACC", "TEMP", "HR", "BVP") - csv_files <- unzip_embrace_plus(zipfile, "csv") + csv_files <- unzip_files(zipfile, "csv") # Get the content before .csv and after the last _ (but include -) dataset_names <- gsub(".*?([A-Za-z0-9\\-]+)[.]csv", "\\1", csv_files) @@ -244,7 +215,7 @@ read_raw_embrace_plus <- function(zipfile, tz) { packages = "org.apache.spark:spark-avro_2.12:3.5.0") cli_alert_success("Connected!") - avro_files <- unzip_embrace_plus(zipfile, type = "avro") + avro_files <- unzip_files(zipfile, type = "avro") cli_alert_info("About to start processing {length(avro_files)} avro file{?s}") From 743a04ddca8af982ae09021b169f1777a60dbf19 Mon Sep 17 00:00:00 2001 From: Veerle van Leemput Date: Thu, 28 Mar 2024 14:28:36 +0100 Subject: [PATCH 04/19] add unzip utils function --- R/utils.R | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/R/utils.R b/R/utils.R index 170a476..d92dfc8 100644 --- a/R/utils.R +++ b/R/utils.R @@ -112,3 +112,28 @@ create_empty_freq_list <- function() { return(freq_empty_list) } + + +#' Unzip files and store files in temporary directory +#' @description Extracts avro or csv files from a zip file +#' @param zipfile path to the zip file +#' @param type type of file to extract +#' @keywords internal +#' @noRd +unzip_files <- function(zipfile, type) { + + # Extract files to a temporary folder + path <- paste0(tempdir(), "/extracted") + + # if path exists, remove content + if (dir.exists(path)) { + unlink(path, recursive = TRUE) + } + + unzip(zipfile = zipfile, + exdir = path) + + files <- list.files(path, recursive = TRUE, pattern = sprintf("[.]%s$", type), full.names = TRUE) + + return(files) +} From fe3736807786df789add8ed46c55ed23d5f78e9d Mon Sep 17 00:00:00 2001 From: Veerle van Leemput Date: Thu, 28 Mar 2024 14:28:54 +0100 Subject: [PATCH 05/19] add `read_nowatch` function --- NAMESPACE | 1 + R/read_nowatch.R | 72 +++++++++++++++++++++++++++++++++++++++++++++ man/read_nowatch.Rd | 33 +++++++++++++++++++++ 3 files changed, 106 insertions(+) create mode 100644 R/read_nowatch.R create mode 100644 man/read_nowatch.Rd diff --git a/NAMESPACE b/NAMESPACE index ebd76d1..4a82b06 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -37,6 +37,7 @@ export(read_and_process_e4) export(read_and_process_embrace_plus) export(read_e4) export(read_embrace_plus) +export(read_nowatch) export(upsample_data_to_8Hz) export(write_processed_e4) import(cli) diff --git a/R/read_nowatch.R b/R/read_nowatch.R new file mode 100644 index 0000000..c492459 --- /dev/null +++ b/R/read_nowatch.R @@ -0,0 +1,72 @@ +#' Read Nowatch data +#' @description Reads in Nowatch data as a list, and prepends timecolumns +#' @details This function reads in a zipfile as exported by Nowatch. Then it extracts the zipfiles in a temporary folder +#' and unzips them in the same temporary folder. +#' +#' The unzipped files are csv files. +#' +#' The function returns an object of class "nowatch_data" with a prepended datetime columns. +#' The object contains a list with dataframes from the physiological signals. +#' +#' @param zipfile A zip file as exported by the instrument. Only aggregated data supported. +#' @param type The type of data contained in the zip file. +#' @param tz The timezone used by the instrument (defaults to user timezone). +#' @examples +#' \dontrun{ +#' library(wearables) +#' read_nowatch("yourpathtohezipfile.zip") +#' } +#' @export +#' @import cli +read_nowatch <- function(zipfile, + tz = Sys.timezone()) { + + # Check if file exists + if (!file.exists(zipfile)) { + cli_abort("File does not exist") + } + + csv_files <- unzip_files(zipfile, "csv") + + # Get the content before .csv and after the last _ (but include -) + dataset_names <- gsub(".*?([A-Za-z0-9\\-]+)[.]csv", "\\1", csv_files) + dataset_names <- toupper(dataset_names) + dataset_names <- gsub("ACTIVITYTYPE", "ACT", dataset_names) + dataset_names <- gsub("CADENCE", "CAD", dataset_names) + dataset_names <- gsub("TEMPERATURE", "TEMP", dataset_names) + dataset_names <- gsub("CORTISOLLEVELS", "CORTL", dataset_names) + dataset_names <- gsub("HEARTBEATS", "HBR", dataset_names) + dataset_names <- gsub("HEARTRATE", "HR", dataset_names) + dataset_names <- gsub("RESPIRATIONRATE", "RR", dataset_names) + dataset_names <- gsub("SLEEPSESSION", "SLEEP", dataset_names) + dataset_names <- gsub("STRESSLEVEL", "STRESS", dataset_names) + csv_files <- setNames(csv_files, dataset_names) + + csv_list <- list() + + for (i in 1:length(csv_files)) { + + file <- csv_files[i] + + this_file <- read.csv(file, stringsAsFactors = FALSE) + + rename_cols <- list(c("timestamp", "unix_timestamp"), + c("value", names(file))) + + for (j in rename_cols) { + if (j[[1]] %in% colnames(this_file)) { + names(this_file)[names(this_file) == j[[1]]] <- j[[2]] + } + } + + # convert unix timestamp to as.POSIXct + if ("unix_timestamp" %in% colnames(this_file)) { + this_file$DateTime <- as.POSIXct(this_file$unix_timestamp, origin = "1970-01-01", tz = tz) + } + + csv_list[[names(file)]] <- this_file + + } + + return(csv_list) +} \ No newline at end of file diff --git a/man/read_nowatch.Rd b/man/read_nowatch.Rd new file mode 100644 index 0000000..e168bfc --- /dev/null +++ b/man/read_nowatch.Rd @@ -0,0 +1,33 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/read_nowatch.R +\name{read_nowatch} +\alias{read_nowatch} +\title{Read Nowatch data} +\usage{ +read_nowatch(zipfile, tz = Sys.timezone()) +} +\arguments{ +\item{zipfile}{A zip file as exported by the instrument. Only aggregated data supported.} + +\item{tz}{The timezone used by the instrument (defaults to user timezone).} + +\item{type}{The type of data contained in the zip file.} +} +\description{ +Reads in Nowatch data as a list, and prepends timecolumns +} +\details{ +This function reads in a zipfile as exported by Nowatch. Then it extracts the zipfiles in a temporary folder +and unzips them in the same temporary folder. + +The unzipped files are csv files. + +The function returns an object of class "nowatch_data" with a prepended datetime columns. +The object contains a list with dataframes from the physiological signals. +} +\examples{ +\dontrun{ + library(wearables) + read_nowatch("yourpathtohezipfile.zip") +} +} From d2fe257991287641230096d61f6d306727e9376c Mon Sep 17 00:00:00 2001 From: Veerle van Leemput Date: Thu, 28 Mar 2024 14:36:55 +0100 Subject: [PATCH 06/19] bump version number --- DESCRIPTION | 6 +++--- NEWS.md | 7 +++++++ 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 3d1ab4d..5590e21 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,8 +1,8 @@ Type: Package Package: wearables Title: Tools to Read and Convert Wearables Data -Version: 0.10.0 -Date: 2024-02-27 +Version: 0.11.0 +Date: 2024-03-28 Authors@R: c( person("Peter", "de Looff", , "peterdelooff@gmail.com", role = c("aut", "cre")), person("Remko", "Duursma", role = "aut"), @@ -14,7 +14,7 @@ Authors@R: c( person("Veerle", "van Leemput", role = "ctb") ) Maintainer: Peter de Looff -Description: Package to read Empatica E4 data, perform several +Description: Package to read Empatica E4, Embrace Plus, and Nowatch data, perform several transformations, perform signal processing and analyses, including batch analyses. License: GPL-2 diff --git a/NEWS.md b/NEWS.md index 55eae71..8468697 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,10 @@ +# wearables 0.11.0 +2024-03-28 + +## New features +* Added new function to read data from Nowatch: `read_nowatch()`. This function will return a list of dataframes with the data from the Nowatch. +* Added reading of aggregated data from Embrace Plus by adding a `type` argument in `read_embrace_plus()`. This argument can be set to `raw` or `aggregated` to read the raw or aggregated data from the Embrace Plus. + # wearables 0.10.0 2024-02-27 From e16479f55825b52a375f9e96019512829bc7c018 Mon Sep 17 00:00:00 2001 From: Veerle van Leemput Date: Thu, 28 Mar 2024 16:21:39 +0100 Subject: [PATCH 07/19] refactor: generalize `aggregate_data()` function for multiple devices --- NAMESPACE | 1 + NEWS.md | 1 + R/aggregate_data.R | 89 ++++++++++++++++--------------------------- man/aggregate_data.Rd | 23 +++++++---- 4 files changed, 50 insertions(+), 64 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index 4a82b06..a173cc8 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -4,6 +4,7 @@ S3method(print,e4data) export(aggregate_data) export(aggregate_e4_data) export(aggregate_embrace_plus_data) +export(aggregate_nowatch_data) export(as_time) export(as_timeseries) export(batch_analysis) diff --git a/NEWS.md b/NEWS.md index 8468697..a662bd6 100644 --- a/NEWS.md +++ b/NEWS.md @@ -4,6 +4,7 @@ ## New features * Added new function to read data from Nowatch: `read_nowatch()`. This function will return a list of dataframes with the data from the Nowatch. * Added reading of aggregated data from Embrace Plus by adding a `type` argument in `read_embrace_plus()`. This argument can be set to `raw` or `aggregated` to read the raw or aggregated data from the Embrace Plus. +* `aggregate_data()` and friends (`aggregate_e4_data()`, `aggregate_embrace_plus_data()`, `aggregate_nowatch()`) now have a `interval` argument. It defaults to `"1 min"`, but can be changed if desired. # wearables 0.10.0 2024-02-27 diff --git a/R/aggregate_data.R b/R/aggregate_data.R index 04f7ba6..a47955e 100644 --- a/R/aggregate_data.R +++ b/R/aggregate_data.R @@ -1,59 +1,22 @@ -#' Aggregate data into 1min timestamps -#' @param x An object read by \code{\link{read_e4}} or \code{\link{read_embrace_plus}}. +#' Aggregate data into timesteps +#' @param x An object read by \code{\link{read_e4}}, \code{\link{read_embrace_plus}} or \code{\link{read_nowatch}}. +#' @param interval The interval to aggregate the data. Default is 1 min. #' @export -aggregate_data <- function(x) { +aggregate_data <- function(x, interval = "1 min") { - if ("EDA" %in% names(x)) { + for (name in names(x)) { - x$EDA <- padr::thicken(x$EDA, - interval = "1 min", - colname = "datetime_1min" - ) %>% - dplyr::group_by(datetime_1min) %>% - summarize(EDA = mean(EDA)) %>% - dplyr::rename(DateTime = datetime_1min) - - } - - if ("ACC" %in% names(x)) { - - x$ACC <- padr::thicken(x$ACC, - interval = "1 min", - colname = "datetime_1min" - ) %>% - group_by(datetime_1min) %>% - summarize( - x = mean(x), - y = mean(y), - z = mean(z), - a = mean(a) - ) %>% - dplyr::rename(DateTime = datetime_1min) - - } - - if ("TEMP" %in% names(x)) { - - x$TEMP <- padr::thicken(x$TEMP, - interval = "1 min", - colname = "datetime_1min" - ) %>% - group_by(datetime_1min) %>% - summarize(TEMP = mean(TEMP)) %>% - dplyr::rename(DateTime = datetime_1min) + if (nrow(x[[name]]) == 0 || !any(c("DateTime", "datetime_1min") %in% colnames(x[[name]]))) { + next + } - } - - if ("HR" %in% names(x)) { - - x$HR <- padr::thicken(x$HR, - interval = "1 min", - colname = "datetime_1min" + x[[name]] <- padr::thicken(x[[name]], + interval = interval, + colname = "datetime_1min" ) %>% - group_by(datetime_1min) %>% - summarize(HR = mean(HR)) %>% + dplyr::group_by(datetime_1min) %>% + dplyr::summarise(across(where(is.numeric), mean)) %>% dplyr::rename(DateTime = datetime_1min) - } x$BVP <- NULL @@ -66,31 +29,45 @@ aggregate_data <- function(x) { } -#' Aggregate E4 data into 1min timesteps +#' Aggregate E4 data into timesteps #' @rdname aggregate_data #' @export -aggregate_e4_data <- function(x) { +aggregate_e4_data <- function(x, interval = "1 min") { if (is.null(x$EDA)) { warning("Data not found. Did you run rbind_e4()?") } - x <- aggregate_data(x) + x <- aggregate_data(x, interval = interval) return(x) } -#' Aggregate Embrace Plus data into 1min timesteps +#' Aggregate Embrace Plus data into timesteps #' @rdname aggregate_data #' @export -aggregate_embrace_plus_data <- function(x) { +aggregate_embrace_plus_data <- function(x, interval = "1 min") { if (is.null(x$EDA)) { warning("Data not found. Did you run rbind_embrace_plus()?") } - x <- aggregate_data(x) + x <- aggregate_data(x, interval = interval) + + return(x) +} + +#' Aggregate Nowatch data into timesteps +#' @rdname aggregate_data +#' @export +aggregate_nowatch_data <- function(x, interval = "1 min") { + + if (is.null(x$EDA)) { + warning("Data not found. Did you use read_nowatch()?") + } + + x <- aggregate_data(x, interval = interval) return(x) } diff --git a/man/aggregate_data.Rd b/man/aggregate_data.Rd index 337d918..825b814 100644 --- a/man/aggregate_data.Rd +++ b/man/aggregate_data.Rd @@ -4,21 +4,28 @@ \alias{aggregate_data} \alias{aggregate_e4_data} \alias{aggregate_embrace_plus_data} -\title{Aggregate data into 1min timestamps} +\alias{aggregate_nowatch_data} +\title{Aggregate data into timesteps} \usage{ -aggregate_data(x) +aggregate_data(x, interval = "1 min") -aggregate_e4_data(x) +aggregate_e4_data(x, interval = "1 min") -aggregate_embrace_plus_data(x) +aggregate_embrace_plus_data(x, interval = "1 min") + +aggregate_nowatch_data(x, interval = "1 min") } \arguments{ -\item{x}{An object read by \code{\link{read_e4}} or \code{\link{read_embrace_plus}}.} +\item{x}{An object read by \code{\link{read_e4}}, \code{\link{read_embrace_plus}} or \code{\link{read_nowatch}}.} + +\item{interval}{The interval to aggregate the data. Default is 1 min.} } \description{ -Aggregate data into 1min timestamps +Aggregate data into timesteps + +Aggregate E4 data into timesteps -Aggregate E4 data into 1min timesteps +Aggregate Embrace Plus data into timesteps -Aggregate Embrace Plus data into 1min timesteps +Aggregate Nowatch data into timesteps } From f7e1c0883abfbea369104eddbb4aeed19d71d364 Mon Sep 17 00:00:00 2001 From: Veerle van Leemput Date: Thu, 28 Mar 2024 16:21:50 +0100 Subject: [PATCH 08/19] add classes --- R/read_embrace_plus.R | 18 +++++++++--------- R/read_nowatch.R | 7 ++++++- 2 files changed, 15 insertions(+), 10 deletions(-) diff --git a/R/read_embrace_plus.R b/R/read_embrace_plus.R index a74f492..a91f514 100644 --- a/R/read_embrace_plus.R +++ b/R/read_embrace_plus.R @@ -131,7 +131,7 @@ read_embrace_plus <- function(zipfile, if (type == "aggregated") { return(read_aggregated_embrace_plus(zipfile, tz)) } - + } @@ -185,7 +185,13 @@ read_aggregated_embrace_plus <- function(zipfile, tz) { } - return(csv_list) + return( + structure(csv_list, + class = "embraceplusdata", + zipfile = tools::file_path_sans_ext(zipfile), + tz = tz + ) + ) } @@ -323,12 +329,6 @@ read_raw_embrace_plus <- function(zipfile, tz) { # Disconnect from the Spark cluster spark_disconnect(sc) - return( - structure(avro_list, - class = "embraceplusdata", - zipfile = tools::file_path_sans_ext(zipfile), - tz = tz - ) - ) + return(avro_list) } diff --git a/R/read_nowatch.R b/R/read_nowatch.R index c492459..14ce126 100644 --- a/R/read_nowatch.R +++ b/R/read_nowatch.R @@ -68,5 +68,10 @@ read_nowatch <- function(zipfile, } - return(csv_list) + return( + structure(csv_list, + class = "nowatchdata", + zipfile = tools::file_path_sans_ext(zipfile), + tz = tz + )) } \ No newline at end of file From 2b872418f4896e98e0ba391cb5df31d545a5f9fb Mon Sep 17 00:00:00 2001 From: Veerle van Leemput Date: Thu, 28 Mar 2024 16:23:44 +0100 Subject: [PATCH 09/19] remove comment --- R/read_embrace_plus.R | 2 -- 1 file changed, 2 deletions(-) diff --git a/R/read_embrace_plus.R b/R/read_embrace_plus.R index a91f514..a6410bd 100644 --- a/R/read_embrace_plus.R +++ b/R/read_embrace_plus.R @@ -145,8 +145,6 @@ read_embrace_plus <- function(zipfile, #' @noRd read_aggregated_embrace_plus <- function(zipfile, tz) { - # e4 reference: c("EDA", "ACC", "TEMP", "HR", "BVP") - csv_files <- unzip_files(zipfile, "csv") # Get the content before .csv and after the last _ (but include -) From 63d7c27b587343def8012aa4b78631734a70b937 Mon Sep 17 00:00:00 2001 From: Veerle van Leemput Date: Thu, 28 Mar 2024 16:26:38 +0100 Subject: [PATCH 10/19] remove `type` for nowatch --- R/read_nowatch.R | 1 - man/read_nowatch.Rd | 2 -- 2 files changed, 3 deletions(-) diff --git a/R/read_nowatch.R b/R/read_nowatch.R index 14ce126..7263f99 100644 --- a/R/read_nowatch.R +++ b/R/read_nowatch.R @@ -9,7 +9,6 @@ #' The object contains a list with dataframes from the physiological signals. #' #' @param zipfile A zip file as exported by the instrument. Only aggregated data supported. -#' @param type The type of data contained in the zip file. #' @param tz The timezone used by the instrument (defaults to user timezone). #' @examples #' \dontrun{ diff --git a/man/read_nowatch.Rd b/man/read_nowatch.Rd index e168bfc..2b9cb4b 100644 --- a/man/read_nowatch.Rd +++ b/man/read_nowatch.Rd @@ -10,8 +10,6 @@ read_nowatch(zipfile, tz = Sys.timezone()) \item{zipfile}{A zip file as exported by the instrument. Only aggregated data supported.} \item{tz}{The timezone used by the instrument (defaults to user timezone).} - -\item{type}{The type of data contained in the zip file.} } \description{ Reads in Nowatch data as a list, and prepends timecolumns From 25827ea43ae207d3b32a68db339752c67d8d076c Mon Sep 17 00:00:00 2001 From: Veerle van Leemput Date: Thu, 18 Apr 2024 09:54:10 +0200 Subject: [PATCH 11/19] allow folder input in `read_embrace_plus` --- NEWS.md | 1 + R/read_embrace_plus.R | 55 ++++++++++++++++++++++++++++++++-------- man/read_embrace_plus.Rd | 14 +++++++--- 3 files changed, 56 insertions(+), 14 deletions(-) diff --git a/NEWS.md b/NEWS.md index a662bd6..8742ec4 100644 --- a/NEWS.md +++ b/NEWS.md @@ -4,6 +4,7 @@ ## New features * Added new function to read data from Nowatch: `read_nowatch()`. This function will return a list of dataframes with the data from the Nowatch. * Added reading of aggregated data from Embrace Plus by adding a `type` argument in `read_embrace_plus()`. This argument can be set to `raw` or `aggregated` to read the raw or aggregated data from the Embrace Plus. +* `read_embrace_plus()` gains additional argument `folder` to choose the folder where the data is stored. This prevents the user to have to navigate to compress the files before using this function. It is still possible to use a zip file as input with the `zipfile` argument. * `aggregate_data()` and friends (`aggregate_e4_data()`, `aggregate_embrace_plus_data()`, `aggregate_nowatch()`) now have a `interval` argument. It defaults to `"1 min"`, but can be changed if desired. # wearables 0.10.0 diff --git a/R/read_embrace_plus.R b/R/read_embrace_plus.R index a6410bd..9429673 100644 --- a/R/read_embrace_plus.R +++ b/R/read_embrace_plus.R @@ -99,37 +99,49 @@ create_dataframes <- function(data, type, file, vars = c("x", "y", "z"), #' The function returns an object of class "embrace_plus_data" with a prepended datetime columns. #' The object contains a list with dataframes from the physiological signals. #' -#' @param zipfile A zip file as exported by the instrument. Can be aggregatd data, or raw data. +#' @param zipfile A zip file as exported by the instrument. Can be aggregated data, or raw data. +#' @param folder A folder with the unzipped files. If this is provided, the zipfile is not used. #' @param type The type of data contained in the zip file. Either "raw" or "aggregated". #' @param tz The timezone used by the instrument (defaults to user timezone). #' @examples #' \dontrun{ #' library(wearables) -#' read_embrace_plus("yourpathtohezipfile.zip") +#' read_embrace_plus(zipfile = "yourpathtohezipfile.zip") +#' read_embrace_plus(folder = "/path/to/folder/with/files", type = "aggregated") #' } #' @export #' @import cli #' @importFrom dplyr pull -read_embrace_plus <- function(zipfile, +read_embrace_plus <- function(zipfile = NULL, + folder = NULL, type = "raw", tz = Sys.timezone()) { - # Check if file exists - if (!file.exists(zipfile)) { + # Check if zipfile or folder is provided + if (is.null(zipfile) && is.null(folder)) { + cli_abort("Either zipfile or folder must be provided") + } + + # Check if file or folder exist + if (!is.null(zipfile) && !file.exists(zipfile)) { cli_abort("File does not exist") } + if (!is.null(folder) && !dir.exists(folder)) { + cli_abort("Folder does not exist") + } + # Check type if (!type %in% c("raw", "aggregated")) { cli_abort("type must be either 'raw' or 'aggregated'") } if (type == "raw") { - return(read_raw_embrace_plus(zipfile, tz)) + return(read_raw_embrace_plus(zipfile, folder, tz)) } if (type == "aggregated") { - return(read_aggregated_embrace_plus(zipfile, tz)) + return(read_aggregated_embrace_plus(zipfile, folder, tz)) } } @@ -139,13 +151,26 @@ read_embrace_plus <- function(zipfile, #' Extract csv files from data #' @description Processes .csv files +#' @param zipfile path to zipfile +#' @param folder path to folder #' @param tz timezone #' @keywords internal #' @import cli #' @noRd -read_aggregated_embrace_plus <- function(zipfile, tz) { +read_aggregated_embrace_plus <- function(zipfile = NULL, folder = NULL, tz) { + + if (!is.null(zipfile)) { + csv_files <- unzip_files(zipfile, "csv") + } - csv_files <- unzip_files(zipfile, "csv") + if (!is.null(folder)) { + # check if there is a subdirectory first + if (length(list.files(folder, full.names = TRUE)) == 1) { + folder <- list.files(folder, full.names = TRUE) + } + + csv_files <- list.files(folder, pattern = ".csv", full.names = TRUE) + } # Get the content before .csv and after the last _ (but include -) dataset_names <- gsub(".*?([A-Za-z0-9\\-]+)[.]csv", "\\1", csv_files) @@ -197,12 +222,14 @@ read_aggregated_embrace_plus <- function(zipfile, tz) { #' Extracts avro files from raw data #' @description Processes .avro files +#' @param zipfile zip file +#' @param folder folder #' @param tz timezone #' @keywords internal #' @import sparklyr #' @import cli #' @noRd -read_raw_embrace_plus <- function(zipfile, tz) { +read_raw_embrace_plus <- function(zipfile = NULL, folder = NULL , tz) { # Check for already installed Spark versions # if none available, install the latest version @@ -219,7 +246,13 @@ read_raw_embrace_plus <- function(zipfile, tz) { packages = "org.apache.spark:spark-avro_2.12:3.5.0") cli_alert_success("Connected!") - avro_files <- unzip_files(zipfile, type = "avro") + if (!is.null(zipfile)) { + avro_files <- unzip_files(zipfile, type = "avro") + } + + if (!is.null(folder)) { + avro_files <- list.files(folder, pattern = ".avro", full.names = TRUE) + } cli_alert_info("About to start processing {length(avro_files)} avro file{?s}") diff --git a/man/read_embrace_plus.Rd b/man/read_embrace_plus.Rd index 9920b6e..6a263d0 100644 --- a/man/read_embrace_plus.Rd +++ b/man/read_embrace_plus.Rd @@ -4,10 +4,17 @@ \alias{read_embrace_plus} \title{Read Embrace Plus data} \usage{ -read_embrace_plus(zipfile, type = "raw", tz = Sys.timezone()) +read_embrace_plus( + zipfile = NULL, + folder = NULL, + type = "raw", + tz = Sys.timezone() +) } \arguments{ -\item{zipfile}{A zip file as exported by the instrument. Can be aggregatd data, or raw data.} +\item{zipfile}{A zip file as exported by the instrument. Can be aggregated data, or raw data.} + +\item{folder}{A folder with the unzipped files. If this is provided, the zipfile is not used.} \item{type}{The type of data contained in the zip file. Either "raw" or "aggregated".} @@ -28,6 +35,7 @@ The object contains a list with dataframes from the physiological signals. \examples{ \dontrun{ library(wearables) - read_embrace_plus("yourpathtohezipfile.zip") + read_embrace_plus(zipfile = "yourpathtohezipfile.zip") + read_embrace_plus(folder = "/path/to/folder/with/files", type = "aggregated") } } From c367d0a77f13d00ee38fdd7e89d62910512be2ab Mon Sep 17 00:00:00 2001 From: Veerle van Leemput Date: Thu, 18 Apr 2024 10:10:48 +0200 Subject: [PATCH 12/19] check activity data in Nowatch object --- R/aggregate_data.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/aggregate_data.R b/R/aggregate_data.R index a47955e..7705e8c 100644 --- a/R/aggregate_data.R +++ b/R/aggregate_data.R @@ -63,7 +63,7 @@ aggregate_embrace_plus_data <- function(x, interval = "1 min") { #' @export aggregate_nowatch_data <- function(x, interval = "1 min") { - if (is.null(x$EDA)) { + if (is.null(x$ACT)) { warning("Data not found. Did you use read_nowatch()?") } From d4c9ecdc7c7c59ad6c82b771716bd1bff96a6d06 Mon Sep 17 00:00:00 2001 From: Veerle van Leemput Date: Thu, 18 Apr 2024 10:11:28 +0200 Subject: [PATCH 13/19] docs: bump version --- DESCRIPTION | 2 +- NEWS.md | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 5590e21..de2057b 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -2,7 +2,7 @@ Type: Package Package: wearables Title: Tools to Read and Convert Wearables Data Version: 0.11.0 -Date: 2024-03-28 +Date: 2024-04-18 Authors@R: c( person("Peter", "de Looff", , "peterdelooff@gmail.com", role = c("aut", "cre")), person("Remko", "Duursma", role = "aut"), diff --git a/NEWS.md b/NEWS.md index 8742ec4..27800a1 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,8 +1,8 @@ # wearables 0.11.0 -2024-03-28 +2024-04-18 ## New features -* Added new function to read data from Nowatch: `read_nowatch()`. This function will return a list of dataframes with the data from the Nowatch. +* Added new function to read data from Nowatch: `read_nowatch()`. This function will return a list of dataframes with the data from the Nowatch. Both a zipfile and a folder are accepted as input. This function is intended for aggregated data, as the Nowatch does not provide raw data. * Added reading of aggregated data from Embrace Plus by adding a `type` argument in `read_embrace_plus()`. This argument can be set to `raw` or `aggregated` to read the raw or aggregated data from the Embrace Plus. * `read_embrace_plus()` gains additional argument `folder` to choose the folder where the data is stored. This prevents the user to have to navigate to compress the files before using this function. It is still possible to use a zip file as input with the `zipfile` argument. * `aggregate_data()` and friends (`aggregate_e4_data()`, `aggregate_embrace_plus_data()`, `aggregate_nowatch()`) now have a `interval` argument. It defaults to `"1 min"`, but can be changed if desired. From 5f53901b42eb14115a624050b825fe5d953f4d91 Mon Sep 17 00:00:00 2001 From: Veerle van Leemput Date: Thu, 18 Apr 2024 10:11:42 +0200 Subject: [PATCH 14/19] docs: change description --- R/read_embrace_plus.R | 4 ++-- man/read_embrace_plus.Rd | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/R/read_embrace_plus.R b/R/read_embrace_plus.R index 9429673..f7dcc60 100644 --- a/R/read_embrace_plus.R +++ b/R/read_embrace_plus.R @@ -91,8 +91,8 @@ create_dataframes <- function(data, type, file, vars = c("x", "y", "z"), #' Read Embrace Plus data #' @description Reads in Embrace Plus data as a list (with EDA, HR, Temp, ACC, BVP, IBI as dataframes), and prepends timecolumns -#' @details This function reads in a zipfile as exported by Embrace Plus. Then it extracts the zipfiles in a temporary folder -#' and unzips them in the same temporary folder. +#' @details This function reads in a zipfile with data from the Embrace Plus device, or +#' a folder with unzipped files. The unzipped files are avro or csv files. #' #' The unzipped files are avro or csv files, where avro files are read in with using `sparklyr`, which sets up a local Spark cluster. #' diff --git a/man/read_embrace_plus.Rd b/man/read_embrace_plus.Rd index 6a263d0..c88599f 100644 --- a/man/read_embrace_plus.Rd +++ b/man/read_embrace_plus.Rd @@ -24,8 +24,8 @@ read_embrace_plus( Reads in Embrace Plus data as a list (with EDA, HR, Temp, ACC, BVP, IBI as dataframes), and prepends timecolumns } \details{ -This function reads in a zipfile as exported by Embrace Plus. Then it extracts the zipfiles in a temporary folder -and unzips them in the same temporary folder. +This function reads in a zipfile with data from the Embrace Plus device, or +a folder with unzipped files. The unzipped files are avro or csv files. The unzipped files are avro or csv files, where avro files are read in with using `sparklyr`, which sets up a local Spark cluster. From bcceb7531a6cd3d61d9096069a7cd4f69133fa60 Mon Sep 17 00:00:00 2001 From: Veerle van Leemput Date: Thu, 18 Apr 2024 10:11:53 +0200 Subject: [PATCH 15/19] allow folder as input --- R/read_nowatch.R | 32 ++++++++++++++++++++++++++------ man/read_nowatch.Rd | 9 ++++++--- 2 files changed, 32 insertions(+), 9 deletions(-) diff --git a/R/read_nowatch.R b/R/read_nowatch.R index 7263f99..05e84c6 100644 --- a/R/read_nowatch.R +++ b/R/read_nowatch.R @@ -1,7 +1,7 @@ #' Read Nowatch data #' @description Reads in Nowatch data as a list, and prepends timecolumns -#' @details This function reads in a zipfile as exported by Nowatch. Then it extracts the zipfiles in a temporary folder -#' and unzips them in the same temporary folder. +#' @details This function reads in a zipfile with files exported by the Nowatch instrument, +#' or a folder with the unzipped files. The files are expected to be csv files. #' #' The unzipped files are csv files. #' @@ -9,23 +9,43 @@ #' The object contains a list with dataframes from the physiological signals. #' #' @param zipfile A zip file as exported by the instrument. Only aggregated data supported. +#' @param folder A folder with the unzipped files. If this is provided, the zipfile is not used. #' @param tz The timezone used by the instrument (defaults to user timezone). #' @examples #' \dontrun{ #' library(wearables) #' read_nowatch("yourpathtohezipfile.zip") +#' read_nowatch(folder = "/path/to/folder/with/files") #' } #' @export #' @import cli -read_nowatch <- function(zipfile, +read_nowatch <- function(zipfile = NULL, + folder = NULL, tz = Sys.timezone()) { - # Check if file exists - if (!file.exists(zipfile)) { + # Check if zipfile or folder is provided + if (is.null(zipfile) && is.null(folder)) { + cli_abort("Please provide a zipfile or a folder") + } + + # Check if zipfile exists + if (!is.null(zipfile) && !file.exists(zipfile)) { cli_abort("File does not exist") } - csv_files <- unzip_files(zipfile, "csv") + # Check if folder exists + if (!is.null(folder) && !dir.exists(folder)) { + cli_abort("Folder does not exist") + } + + # Unzip the files + if (!is.null(zipfile)) { + csv_files <- unzip_files(zipfile, "csv") + } + + if (!is.null(folder)) { + csv_files <- list.files(folder, full.names = TRUE) + } # Get the content before .csv and after the last _ (but include -) dataset_names <- gsub(".*?([A-Za-z0-9\\-]+)[.]csv", "\\1", csv_files) diff --git a/man/read_nowatch.Rd b/man/read_nowatch.Rd index 2b9cb4b..92e553d 100644 --- a/man/read_nowatch.Rd +++ b/man/read_nowatch.Rd @@ -4,19 +4,21 @@ \alias{read_nowatch} \title{Read Nowatch data} \usage{ -read_nowatch(zipfile, tz = Sys.timezone()) +read_nowatch(zipfile = NULL, folder = NULL, tz = Sys.timezone()) } \arguments{ \item{zipfile}{A zip file as exported by the instrument. Only aggregated data supported.} +\item{folder}{A folder with the unzipped files. If this is provided, the zipfile is not used.} + \item{tz}{The timezone used by the instrument (defaults to user timezone).} } \description{ Reads in Nowatch data as a list, and prepends timecolumns } \details{ -This function reads in a zipfile as exported by Nowatch. Then it extracts the zipfiles in a temporary folder -and unzips them in the same temporary folder. +This function reads in a zipfile with files exported by the Nowatch instrument, +or a folder with the unzipped files. The files are expected to be csv files. The unzipped files are csv files. @@ -27,5 +29,6 @@ The object contains a list with dataframes from the physiological signals. \dontrun{ library(wearables) read_nowatch("yourpathtohezipfile.zip") + read_nowatch(folder = "/path/to/folder/with/files") } } From 02a977928b0f5a3197d23af463c116266d067f08 Mon Sep 17 00:00:00 2001 From: Veerle van Leemput Date: Thu, 18 Apr 2024 15:10:05 +0200 Subject: [PATCH 16/19] rename more cols --- R/read_embrace_plus.R | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/R/read_embrace_plus.R b/R/read_embrace_plus.R index f7dcc60..1f60335 100644 --- a/R/read_embrace_plus.R +++ b/R/read_embrace_plus.R @@ -193,7 +193,9 @@ read_aggregated_embrace_plus <- function(zipfile = NULL, folder = NULL, tz) { rename_cols <- list(c("timestamp_iso", "DateTime"), c("timestamp_unix", "unix_timestamp"), - c("eda_scl_usiemens", "EDA")) + c("eda_scl_usiemens", "EDA"), + c("temperature_celsius", "TEMP"), + c("pulse_rate_bpm", "HR")) for (j in rename_cols) { if (j[[1]] %in% colnames(this_file)) { From 5b7e63bf2a679edca592a1037f4bb08fa02043fd Mon Sep 17 00:00:00 2001 From: Veerle van Leemput Date: Thu, 18 Apr 2024 15:10:17 +0200 Subject: [PATCH 17/19] TBD: remove NAs --- R/read_and_process_embrace_plus.R | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/R/read_and_process_embrace_plus.R b/R/read_and_process_embrace_plus.R index 5bd5e10..374780a 100644 --- a/R/read_and_process_embrace_plus.R +++ b/R/read_and_process_embrace_plus.R @@ -23,7 +23,11 @@ read_and_process_embrace_plus <- function(zipfile, tz = Sys.timezone()) { #' @param data object from read_e4 function process_embrace_plus <- function(data) { - eda_filt <- process_eda(data$EDA) + # omitting NAs: TBD + # the Embrace Plus aggregated files have a lot of NAs, + # for example when the device wasn't able to record anything + # we need to decide how to handle these NAs + eda_filt <- process_eda(na.omit(data$EDA)) flog.info("EDA data filtered.") eda_peaks <- find_peaks(eda_filt) From 5d6af8a8d6f6494f660c30ad99c86c98659d63bd Mon Sep 17 00:00:00 2001 From: Veerle van Leemput Date: Wed, 24 Apr 2024 15:51:43 +0200 Subject: [PATCH 18/19] update `read_and_process_embrace_plus` --- R/read_and_process_embrace_plus.R | 24 +++++++++++++++++++++--- R/read_embrace_plus.R | 2 +- man/read_and_process_embrace_plus.Rd | 11 ++++++++++- man/read_embrace_plus.Rd | 2 +- 4 files changed, 33 insertions(+), 6 deletions(-) diff --git a/R/read_and_process_embrace_plus.R b/R/read_and_process_embrace_plus.R index 374780a..b9db8fb 100644 --- a/R/read_and_process_embrace_plus.R +++ b/R/read_and_process_embrace_plus.R @@ -2,13 +2,31 @@ #' @description Reads the raw ZIP file using `read_embrace_plus`, #' performs analyses with `eda_analysis`. #' @param zipfile zip file with embrace plus data to be read +#' @param folder A folder with the unzipped files. If this is provided, the zipfile is not used. +#' @param type The type of data contained in the zip file. Either "raw" or "aggregated". #' @param tz timezone where data were recorded (default system timezone) #' @return An object with processed data and analyses, object of class 'embrace_plus_analysis'. #' @rdname read_and_process_embrace_plus #' @export -read_and_process_embrace_plus <- function(zipfile, tz = Sys.timezone()) { - data <- read_embrace_plus(zipfile, tz) - data <- rbind_embrace_plus(data) +read_and_process_embrace_plus <- function(zipfile = NULL, folder = NULL, type = "raw", tz = Sys.timezone()) { + + # Check if zipfile or folder is provided + if (is.null(zipfile) && is.null(folder)) { + cli_abort("Either zipfile or folder must be provided") + } + + if (!is.null(zipfile) && !is.null(folder)) { + cli_warning("Only folder will be processed, zipfile will be ignored") + } + + if (!is.null(zipfile)) { + data <- read_embrace_plus(zipfile = zipfile, type = type, tz = tz) + data <- rbind_embrace_plus(data) + } + + if (!is.null(folder)) { + data <- read_embrace_plus(folder = folder, type = type, tz = tz) + } if (is.null(data)) { return(NULL) diff --git a/R/read_embrace_plus.R b/R/read_embrace_plus.R index 1f60335..e1f0b5e 100644 --- a/R/read_embrace_plus.R +++ b/R/read_embrace_plus.R @@ -101,7 +101,7 @@ create_dataframes <- function(data, type, file, vars = c("x", "y", "z"), #' #' @param zipfile A zip file as exported by the instrument. Can be aggregated data, or raw data. #' @param folder A folder with the unzipped files. If this is provided, the zipfile is not used. -#' @param type The type of data contained in the zip file. Either "raw" or "aggregated". +#' @param type The type of data contained in the zip file or folder. Either "raw" or "aggregated". #' @param tz The timezone used by the instrument (defaults to user timezone). #' @examples #' \dontrun{ diff --git a/man/read_and_process_embrace_plus.Rd b/man/read_and_process_embrace_plus.Rd index 2fdd367..52b6159 100644 --- a/man/read_and_process_embrace_plus.Rd +++ b/man/read_and_process_embrace_plus.Rd @@ -4,11 +4,20 @@ \alias{read_and_process_embrace_plus} \title{Read, process and feature extraction of Embrace Plus data} \usage{ -read_and_process_embrace_plus(zipfile, tz = Sys.timezone()) +read_and_process_embrace_plus( + zipfile = NULL, + folder = NULL, + type = "raw", + tz = Sys.timezone() +) } \arguments{ \item{zipfile}{zip file with embrace plus data to be read} +\item{folder}{A folder with the unzipped files. If this is provided, the zipfile is not used.} + +\item{type}{The type of data contained in the zip file. Either "raw" or "aggregated".} + \item{tz}{timezone where data were recorded (default system timezone)} } \value{ diff --git a/man/read_embrace_plus.Rd b/man/read_embrace_plus.Rd index c88599f..66ef607 100644 --- a/man/read_embrace_plus.Rd +++ b/man/read_embrace_plus.Rd @@ -16,7 +16,7 @@ read_embrace_plus( \item{folder}{A folder with the unzipped files. If this is provided, the zipfile is not used.} -\item{type}{The type of data contained in the zip file. Either "raw" or "aggregated".} +\item{type}{The type of data contained in the zip file or folder. Either "raw" or "aggregated".} \item{tz}{The timezone used by the instrument (defaults to user timezone).} } From 24ea35d4f763a1567a02fa6208f02bbe0803f956 Mon Sep 17 00:00:00 2001 From: Veerle van Leemput Date: Wed, 24 Apr 2024 16:06:33 +0200 Subject: [PATCH 19/19] use MOVE column for ACC --- R/read_and_process_embrace_plus.R | 37 ++++++++++++++++++++++++------- 1 file changed, 29 insertions(+), 8 deletions(-) diff --git a/R/read_and_process_embrace_plus.R b/R/read_and_process_embrace_plus.R index b9db8fb..554aeb4 100644 --- a/R/read_and_process_embrace_plus.R +++ b/R/read_and_process_embrace_plus.R @@ -31,7 +31,7 @@ read_and_process_embrace_plus <- function(zipfile = NULL, folder = NULL, type = if (is.null(data)) { return(NULL) } else { - flog.info("Raw data read and converted.") + flog.info(sprintf("%s data read and converted.", type)) process_embrace_plus(data) } } @@ -75,13 +75,34 @@ process_embrace_plus <- function(data) { TEMP_sd = sd(data$TEMP$TEMP) ) - acc_summary <- list( - ACC_mean = mean(data$ACC$a), - ACC_median = median(data$ACC$a), - ACC_min = min(data$ACC$a), - ACC_max = max(data$ACC$a), - ACC_sd = sd(data$ACC$a) - ) + # Determine if ACC is in the data, if not, look for MOVE, if not, set to NA + if ("ACC" %in% names(data)) { + acc_data <- "ACC" + acc_col <- "a" + } else if ("MOVE" %in% names(data)) { + acc_data <- "MOVE" + acc_col <- "accelerometers_std_g" + } else { + acc_data <- "" + } + + if(acc_data != "") { + acc_summary <- list( + ACC_mean = mean(data[[acc_data]][[acc_col]]), + ACC_median = median(data[[acc_data]][[acc_col]]), + ACC_min = min(data[[acc_data]][[acc_col]]), + ACC_max = max(data[[acc_data]][[acc_col]]), + ACC_sd = sd(data[[acc_data]][[acc_col]]) + ) + } else { + acc_summary <- list( + ACC_mean = NA, + ACC_median = NA, + ACC_min = NA, + ACC_max = NA, + ACC_sd = NA + ) + } eda_clean <- dplyr::filter(eda_filt, .data$quality_flag == 1)