Merge pull request #2 from tscnlab/rework_import_function_creation

Rework import function creation
tscnlab · Oct 31, 2023 · 0c0f024 · 0c0f024
2 parents 2244b40 + ffb4ba3
commit 0c0f024
Show file tree

Hide file tree

Showing 24 changed files with 318 additions and 253 deletions.
diff --git a/NAMESPACE b/NAMESPACE
@@ -17,9 +17,8 @@ export(gap_handler)
 export(gapless_Datetimes)
 export(gg_day)
 export(gg_overview)
-export(import.ActLumus)
+export(import)
 export(import.Dataset)
-export(import.LYS)
 export(import.Statechanges)
 export(interval2state)
 export(join.datasets)

diff --git a/NEWS.md b/NEWS.md
@@ -1,5 +1,7 @@
 # LightLogR 0.2.1.9000
 
+* Reworked the internals of the light logger data import functions. They now use a more straightforward function factory approach. For users the only visible change it that device specific functions now have the form `import$device()` instead of the old `import.device()`.
+
 * Added the `symlog_trans()` function from a [post on stack overflow](https://stackoverflow.com/a/14674703). This function leads to a better visualization of light logger data, as a logarithmic transformation is necessary, but values of 0 are common. The function was integrated as a default for `gg_day()` and will likely be the basis of upcoming visualization functions.
 
 * Added the `aggregate_Datetime()` function to aggregate data to a given time interval.

diff --git a/R/aaa.r b/R/aaa.r
@@ -1,4 +1,4 @@
-Time <- mEDI <- Time.data <- Datetime <- timestamp <- tz <- Day.data <- `DATE/TIME` <- n <- Datetime.rounded <- id <- sleep.colname.string <- file.name <- Interval <- original.datapoints.fleeting <- MEDI <- State.Brown <- Reference <- Reference.check <- Id <- Start.date.shift <- data <- Shift <- `MELANOPIC EDI` <- State <- group <- End <- Start <- Quant.x <- Quant.y <- is.implicit <- group.indices <- Id2 <- gap.id <- start <- end <-  NULL
+Time <- mEDI <- Time.data <- Datetime <- timestamp <- tz <- Day.data <- `DATE/TIME` <- n <- Datetime.rounded <- id <- sleep.colname.string <- file.name <- Interval <- original.datapoints.fleeting <- MEDI <- State.Brown <- Reference <- Reference.check <- Id <- Start.date.shift <- data <- Shift <- `MELANOPIC EDI` <- State <- group <- End <- Start <- Quant.x <- Quant.y <- is.implicit <- group.indices <- Id2 <- gap.id <- start <- end <- path <- auto.id <- n_max <- manual.id <- NULL
 
 empty_function <- function() {
   rsconnect::accountInfo()

diff --git a/R/data.r b/R/data.r
@@ -22,6 +22,11 @@
 
 #' A vector of all supported devices for import functions
 #'
+#' These are all supported devices where there is a dedicated import function.
+#' Import functions can be called either through [import.Dataset()] with the
+#' respective `device = "device"` argument, or directly, e.g.,
+#' `import$ActLumus()`.
+#'
 #' @format `supported.devices` A character vector, listing all supported devices
 #' \describe{
 #'   \item{suppored.devices}{strings}

diff --git a/R/import_LL.R b/R/import_LL.R
@@ -2,40 +2,52 @@
 
 #' Import a light logger dataset or related data
 #'
+#' @description
+#'
 #' Imports a dataset and does the necessary transformations to get the right
 #' column formats. Unless specified otherwise, the function will set the
 #' timezone of the data to `UTC`. It will also enforce an `id` to separate
 #' different datasets and will order/arrange the dataset within each `id`.
 #'
-#' If the `Id` column is already part of the `dataset` it will just use this
-#' column. If the column is not present it will add this column and fill it with
-#' the filename of the importfile (see param `auto.id`).
+#' There are specific and a general import function. The general import function
+#' is described below, whereas the specific import functions take the form of
+#' `import$device()`. The general import function is a thin wrapper around the
+#' specific import functions. The specific import functions take the following
+#' arguments:
+#'
+#' * `filename`: Filename(s) for the Dataset. Can also contain the filepath,
+#' but `path` must then be `NULL`. Expects a `character`. If the vector is
+#' longer than `1`, multiple files will be read in into one Tibble.
+#' * `path`: Optional path for the dataset(s). `NULL` is the default. Expects
+#' a `character`.
+#' * `n_max`: maximum number of lines to read. Default is `Inf`.
+#' * `tz`: Timezone of the data. `"UTC"` is the default. Expects a
+#' `character`. You can look up the supported timezones with [OlsonNames()].
+#' * `ID.colname`: Lets you specify a column for the participant id. Expects a
+#' symbol (Default is `Id`). This column will be used for grouping
+#' ([dplyr::group_by()]).
+#' * `auto.id`: If the `Id.colname` column is added to the `dataset`, the `Id`
+#' can be automatically extracted from the filename. The argument expects a
+#' regular expression [regex] and will by default just give the whole filename
+#' without file extension.
+#' * `manual.id`: If this argument is not `NULL`, and no `ID` column is part
+#' of the `dataset`, this `character` scalar will be used. **Don´t use this
+#' argument if multiple files from different participants are used!**
+#' * `locale`: The locale controls defaults that vary from place to place.
+#' * `...`: supply additional arguments to the [readr] import functions, like `na`. Might also be used to supply arguments to the specific import functions, like `column_names` for `Actiwatch_Spectrum` devices. Those devices will alway throw a helpful error message if you forget to supply the necessary arguments.
+#'
+#' @details If the `Id` column is already part of the `dataset` it will just use
+#'   this column. If the column is not present it will add this column and fill
+#'   it with the filename of the importfile (see param `auto.id`).
 #'
-#' @param filename Filename(s) for the Dataset. Can also contain the filepath,
-#'   but `path` must then be `NULL`. Expects a `character`. If the vector is
-#'   longer than `1`, multiple files will be read in into one Tibble.
-#' @param path Optional path for the dataset(s). `NULL` is the default. Expects
-#'   a `character`.
-#' @param n_max maximum number of lines to read. Default is `Inf`.
-#' @param tz Timezone of the data. `"UTC"` is the default. Expects a
-#'   `character`. You can look up the supported timezones with [OlsonNames()].
-#' @param ID.colname Lets you specify a column for the participant id. Expects a
-#'   symbol (Default is `Id`). This column will be used for grouping
-#'   ([dplyr::group_by()]).
-#' @param auto.id If the `Id.colname` column is added to the `dataset`, the `Id`
-#'   can be automatically extracted from the filename. The argument expects a
-#'   regular expression [regex] and will by default just give the whole filename
-#'   without file extension.
-#' @param manual.id If this argument is not `NULL`, and no `ID` column is part
-#'   of the `dataset`, this `character` scalar will be used.
-#' **Don´t use this argument if multiple files from different participants are used!**.
 #' @param ... Parameters that get handed down to the specific import functions
 #' @param device From what device do you want to import? For every supported
 #'   device, there is a sample data file that you can use to test the function
 #'   (see the examples). Currently the following devices are supported (followed
 #'   by the `device.ext` spec to access the sample file):
 #' * `"ActLumus"` (ActLumus.txt)
 #' * `"LYS"` (LYS.csv)
+#' * `"Actiwatch_Spectrum"` (Actiwatch.csv) *Note: as the `locale` argument use `readr::locale(encoding="latin1")` . This is due to the fact that the German Actiwatch software from which this sample file was taken, uses a different encoding than UTF-8.*
 #' @importFrom rlang :=
 #' @return Tibble/Dataframe with a POSIXct column for the datetime
 #' @export
@@ -68,12 +80,12 @@
 #'
 #' ```{r}
 #' filepath <- system.file("extdata/sample_data_ActLumus.txt", package = "LightLogR")
-#' dataset <- import.ActLumus(filepath)
+#' dataset <- import$ActLumus(filepath)
 #' ```
 #'
 #' ```{r}
 #' dataset %>%
-#' dplyr::select(Datetime, TEMPERATURE, LIGHT, MEDI) %>%
+#' dplyr::select(Datetime, TEMPERATURE, LIGHT, MEDI, Id) %>%
 #' dplyr::slice(1500:1505) %>%
 #' flextable::flextable() %>%
 #' flextable::autofit()
@@ -87,89 +99,172 @@ import.Dataset <- function(device, ...) {
       device %in% supported.devices
   )
 
-  import_function_expr <- rlang::parse_expr(paste0("import.", device))
+  import_function_expr <- rlang::parse_expr(paste0("import$", device))
 
   eval(import_function_expr)(...)
 }
 
-# ActLumus ----------------------------------------------------------------
-
-#' Import Dataset from ActLumus
-#'
-#' @rdname import.Dataset
-#' @export
-
-import.ActLumus <- 
-  function(filename, 
-           path = NULL, 
-           n_max = Inf,
-           tz = "UTC",
-           ID.colname = Id,
-           auto.id = ".*",
-           manual.id = NULL) {
-
-    if (!is.null(path)) {
-      filename <- file.path(path, filename)
-    }
-
-    #special handling for ActLumus files
-    import.expr <- rlang::expr(
-      {tmp <- readr::read_delim(!!filename,
-                                skip = 32,
-                                delim = ";",
-                                n_max = !!n_max,
-                                col_types = paste0("c",rep("d",32)),
-                                id = "file.name"
+# General ----------------------------------------------------------------
+#This internal helper function is a function factory to create import functions
+#based on device name and specific import expression
+imports <- function(device,
+                    import.expr) {
+
+  import.expr <- rlang::enexpr(import.expr)
+  ID.colname <- quote({{ ID.colname}})
+
+  rlang::new_function(
+    #function arguments
+    rlang::exprs(
+      filename =, 
+      path = NULL, 
+      n_max = Inf,
+      tz = "UTC",
+      ID.colname = Id,
+      auto.id = ".*",
+      manual.id = NULL,
+      locale = readr::default_locale(),
+      ... =
+    ),
+    #function expression
+    rlang::expr({
+
+      if (!is.null(path)) {
+        filename <- file.path(path, filename)
+      }
+
+      id.colname.defused <- colname.defused(!!ID.colname)
+      #initial checks
+      stopifnot(
+        "filename needs to be a character (vector)" = is.character(filename),
+        "device needs to be a character" = is.character(!!device),
+        "tz needs to be a character" = is.character(tz),
+        "tz needs to be a valid time zone, see `OlsonNames()`" = tz %in% OlsonNames(),
+        "auto.id needs to be a string" = is.character(auto.id),
+        "n_max needs to be a positive numeric" = is.numeric(n_max)
       )
-      tmp <- tmp %>%
-        dplyr::rename(Datetime = `DATE/TIME`,
-                      MEDI = `MELANOPIC EDI`) %>%
-        dplyr::mutate(Datetime =
-                        Datetime %>% lubridate::dmy_hms(tz = !!tz))
+      #import the file
+      tmp <- rlang::eval_tidy(!!import.expr)
+
+      #validate/manipulate the file
+      if(!id.colname.defused %in% names(tmp)) {
+        switch(is.null(manual.id) %>% as.character(),
+               "TRUE" =
+                 {tmp <- tmp %>%
+                   dplyr::mutate(!!ID.colname :=
+                                   basename(file.name) %>%
+                                   tools::file_path_sans_ext() %>%
+                                   stringr::str_extract(auto.id),
+                                 .before = 1)},
+               "FALSE" =
+                 {tmp <- tmp %>%
+                   dplyr::mutate(!!ID.colname := manual.id, .before = 1)}
+        )
       }
+      tmp <- tmp %>%
+        dplyr::mutate(file.name = basename(file.name) %>%
+                        tools::file_path_sans_ext(),
+                      !!ID.colname := factor(!!ID.colname)) %>%
+        dplyr::group_by(!!ID.colname) %>%
+        dplyr::arrange(Datetime, .by_group = TRUE)
+
+      #give info about the file
+      import.info(tmp, !!device, tz, !!ID.colname)
+
+      #return the file
+      tmp
+
+    }),
+    rlang::caller_env()
+  )
+}
+
+import_arguments <- list(
+  #ActLumus
+  ActLumus = rlang::expr({
+    tmp <- readr::read_delim(
+      filename,
+      skip = 32,
+      delim = ";",
+      n_max = n_max,
+      col_types = paste0("c", rep("d", 32)),
+      id = "file.name",
+      locale = locale,
+      ...
+    )
+    tmp <- tmp %>%
+      dplyr::rename(Datetime = `DATE/TIME`,
+                    MEDI = `MELANOPIC EDI`) %>%
+      dplyr::mutate(Datetime =
+                      Datetime %>% lubridate::dmy_hms(tz = tz))
+  }),
+  #LYS
+  LYS = rlang::expr({
+    tmp <- readr::read_csv(filename,
+                           n_max = n_max,
+                           col_types = c("cfddddddddddd"),
+                           id = "file.name",
+                           locale = locale,
+                           ...
     )
-
-    #generic import function
-    import.link("ActLumus", {{ ID.colname }})
-
-  }
+    tmp <- tmp %>%
+      dplyr::rename(Datetime = timestamp,
+                    MEDI = mEDI) %>%
+      dplyr::mutate(Datetime =
+                      Datetime %>% lubridate::dmy_hms(tz = tz))
+  }),
+  #Actiwatch Spectrum
+  Actiwatch_Spectrum = rlang::expr({
+    #separate the dots list in the column_names and the rest
+    dots <- rlang::list2(...)
+    column_names <- dots$column_names
+    if(is.null(column_names)) 
+      stop("Actiwatch Spectrum requires a vector of `column_names` in the order in which they appear in the file in order to properly detect the starting row")
+    dots$column_names <- NULL
 
+    tmp <- 
+      purrr::map(
+        filename,
+        \(x) {
+          rows_to_skip <- detect_starting_row(x, 
+                                              locale = locale, 
+                                              column_names = column_names,
+                                              n_max = n_max)
+          df <- suppressMessages(do.call(
+            readr::read_csv,
+            append(list(
+            x, 
+            skip = rows_to_skip,
+            locale=locale,
+            id = "file.name",
+            show_col_types = FALSE
+            ),
+            dots)))
+
+          df %>% 
+            dplyr::select(!dplyr::starts_with("..."))
+
+        }) %>% purrr::list_rbind()
+    tmp <- tmp %>%
+      tidyr::unite(col = "Datetime",
+                   tidyselect::where(lubridate::is.Date),
+                   tidyselect::where(hms::is_hms),
+                   remove = FALSE
+      ) %>% 
+      dplyr::mutate(
+        Datetime = lubridate::ymd_hms(Datetime),
+        dplyr::across(
+          dplyr::where(is.character) & 
+            dplyr::where(~ any(stringr::str_detect(.x, ","), na.rm = TRUE)),
+          ~ stringr::str_replace(.x, ",", ".") %>% 
+              as.numeric()
+        ))
+  })
 
-# LYS ---------------------------------------------------------------------
+)
 
-#' Import Dataset from LYS Button
+#' Import Datasets from supported devices
 #'
 #' @rdname import.Dataset
 #' @export
-
-import.LYS <- function(filename, 
-                      path = NULL, 
-                      n_max = Inf,
-                      tz = "UTC",
-                      ID.colname = Id,
-                      auto.id = ".*",
-                      manual.id = NULL) {
-
-  if (!is.null(path)) {
-    filename <- file.path(path, filename)
-  }
-
-  #special handling for LYS files
-  import.expr <- rlang::expr(
-    {tmp <- readr::read_csv(!!filename,
-                            n_max = !!n_max,
-                            col_types = c("cfddddddddddd"),
-                            id = "file.name"
-    )
-    tmp <- tmp %>%
-      dplyr::rename(Datetime = timestamp,
-                    MEDI = mEDI) %>%
-      dplyr::mutate(Datetime =
-                      Datetime %>% lubridate::dmy_hms(tz = !!tz))
-    }
-  )
-
-  #generic import function
-  import.link("LYS", {{ ID.colname }})
-
-}
+import <- purrr::imap(import_arguments, \(x, idx) imports(idx,x))
diff --git a/R/import_States.R b/R/import_States.R
@@ -8,7 +8,13 @@
 #' * In the `wide` format, multiple `Datetime` columns indicate the state through the column name. These get pivoted to the `long` format and can be recoded through the `State.encoding` argument.
 #' * In the `long` format, one column indicates the `State`, while the other gives the `Datetime`.
 #'
-#' @inheritParams import.Dataset
+#' @param filename Filename(s) for the Dataset. Can also contain the filepath,
+#' but `path` must then be `NULL`. Expects a `character`. If the vector is
+#' longer than `1`, multiple files will be read in into one Tibble.
+#' @param path Optional path for the dataset(s). `NULL` is the default. Expects
+#' a `character`.
+#' @param tz Timezone of the data. `"UTC"` is the default. Expects a
+#' `character`. You can look up the supported timezones with [OlsonNames()].
 #' @param sep String that separates columns in the import file. Defaults to
 #'   `","`.
 #' @param dec String that indicates a decimal separator in the import file.