Merge pull request #3 from tscnlab/gg_days

Gg days
tscnlab · Nov 2, 2023 · 5d50323 · 5d50323
2 parents 0c0f024 + 4aaec99
commit 5d50323
Show file tree

Hide file tree

Showing 26 changed files with 823 additions and 73 deletions.
diff --git a/NAMESPACE b/NAMESPACE
@@ -4,18 +4,23 @@ export("%>%")
 export(Brown.check)
 export(Brown.rec)
 export(Brown2reference)
+export(Datetime_breaks)
+export(Datetime_limits)
 export(aggregate_Datetime)
+export(count.difftime)
 export(create_Time.data)
 export(cut_Datetime)
 export(data2reference)
 export(dominant_epoch)
 export(filter_Date)
 export(filter_Datetime)
+export(filter_Datetime_multiple)
 export(filter_Time)
 export(gap_finder)
 export(gap_handler)
 export(gapless_Datetimes)
 export(gg_day)
+export(gg_days)
 export(gg_overview)
 export(import)
 export(import.Dataset)

diff --git a/NEWS.md b/NEWS.md
@@ -1,5 +1,11 @@
 # LightLogR 0.2.1.9000
 
+* Exports the up to now internal function `count.difftime()` that is the basis for `dominant_epoch()`. But whereas the latter gets only the most common epoch, `count.difftime()` returns a table with the counts of all epochs. This is useful in conjunction with `gap_finder()`, to check the distribution of data intervals.
+
+* Added the `gg_days()` function to visualize multiple days of data in a single plot. Alongside come two helper functions, `Datetime_limits()` and `Datetime_breaks()`, to set the limits and breaks of the x-axis. 
+
+* Added the `filter_Datetime_multiple()` function to filter for multiple Datetime ranges depending on certain conditions, e.g. different filter cutoffs for different participants. It wraps around `filter_Datetime()` or `filter_Date()`.
+
 * Reworked the internals of the light logger data import functions. They now use a more straightforward function factory approach. For users the only visible change it that device specific functions now have the form `import$device()` instead of the old `import.device()`.
 
 * Added the `symlog_trans()` function from a [post on stack overflow](https://stackoverflow.com/a/14674703). This function leads to a better visualization of light logger data, as a logarithmic transformation is necessary, but values of 0 are common. The function was integrated as a default for `gg_day()` and will likely be the basis of upcoming visualization functions.

diff --git a/R/aaa.r b/R/aaa.r
@@ -1,4 +1,4 @@
-Time <- mEDI <- Time.data <- Datetime <- timestamp <- tz <- Day.data <- `DATE/TIME` <- n <- Datetime.rounded <- id <- sleep.colname.string <- file.name <- Interval <- original.datapoints.fleeting <- MEDI <- State.Brown <- Reference <- Reference.check <- Id <- Start.date.shift <- data <- Shift <- `MELANOPIC EDI` <- State <- group <- End <- Start <- Quant.x <- Quant.y <- is.implicit <- group.indices <- Id2 <- gap.id <- start <- end <- path <- auto.id <- n_max <- manual.id <- NULL
+Time <- mEDI <- Time.data <- Datetime <- timestamp <- tz <- Day.data <- `DATE/TIME` <- n <- Datetime.rounded <- id <- sleep.colname.string <- file.name <- Interval <- original.datapoints.fleeting <- MEDI <- State.Brown <- Reference <- Reference.check <- Id <- Start.date.shift <- data <- Shift <- `MELANOPIC EDI` <- State <- group <- End <- Start <- Quant.x <- Quant.y <- is.implicit <- group.indices <- Id2 <- gap.id <- start <- end <- path <- auto.id <- n_max <- manual.id <- silent <-  NULL
 
 empty_function <- function() {
   rsconnect::accountInfo()

diff --git a/R/filter_Datetime.R b/R/filter_Datetime.R
@@ -32,6 +32,7 @@
 #'   is FALSE). This is useful, e.g., when the first observation in the dataset
 #'   is slightly after midnight. If TRUE, it will count the length from midnight
 #'   on to avoid empty days in plotting with [gg_day()].
+#' @param only_Id An expression of `ids` where the filtering should be applied to. If `NULL` (the default), the filtering will be applied to all `ids`. Based on the this expression, the dataset will be split in two and only where the given expression evaluates to `TRUE`, will the filtering take place. Afterwards both sets are recombined and sorted by `Datetime`.
 #'
 #' @return a `data.frame` object identical to `dataset` but with only the
 #'   specified Dates/Times.
@@ -86,12 +87,15 @@ filter_Datetime <- function(dataset,
                             length = NULL,
                             full.day = FALSE,
                             tz = NULL,
+                            only_Id = NULL,
                             filter.expr = NULL) {
 
   # Initial Checks ----------------------------------------------------------
 
   filter.expr <- rlang::enexpr(filter.expr)
 
+  only_Id <- rlang::enexpr(only_Id)
+
   Datetime.colname.defused <- 
     rlang::enexpr(Datetime.colname) %>% rlang::as_string()
   #timezone
@@ -119,6 +123,16 @@ filter_Datetime <- function(dataset,
 
   # Manipulation ----------------------------------------------------------
 
+  #split the dataset in two parts, based on the only_Id expression
+  if(!is.null(only_Id)) {
+    dataset_unfiltered <-
+      dataset %>% 
+      dplyr::filter(!(!!only_Id), .preserve = TRUE)
+    dataset <- 
+      dataset %>% 
+      dplyr::filter(!!only_Id, .preserve = TRUE)
+  }
+
   #calculate starting time if length and end are given
   if(is.null(start) & !is.null(length) & !is.null(end)) {
     start <- lubridate::as_datetime(end, tz = tz) - length
@@ -155,7 +169,10 @@ filter_Datetime <- function(dataset,
     }
 
   # Return --------------------------------------------------------------
-  dataset
+    if(!is.null(only_Id)) {
+      dplyr::bind_rows(dataset, dataset_unfiltered) %>% 
+        dplyr::arrange({{ Datetime.colname }}, .by_group = TRUE)
+    } else dataset
 }
 
 
@@ -184,4 +201,44 @@ filter_Date <- function(...,
   filter_Datetime(...,
                   start = start,
                   end = end)
+}
+
+# multiple filter_Date -------------------------------------------------------------
+
+#' Filter multiple times based on a list of arguments.
+#'
+#' [filter_Datetime_multiple()] is a wrapper around [filter_Datetime()] or
+#' [filter_Date()] that allows the cumulative filtering of `Datetimes` based on
+#' varying filter conditions. It is most useful in conjunction with the
+#' `only_Id` argument, e.g., to selectively cut off dates depending on
+#' participants (see examples)
+#'
+#' @param dataset A light logger dataset
+#' @param arguments A list of arguments to be passed to [filter_Datetime()] or
+#'   [filter_Date()]. each list entry must itself be a list of arguments, e.g,
+#'   `list(start = "2021-01-01", only_Id = quote(Id == 216))`. Expressions have
+#'   to be quoted with [quote()] or [rlang::expr()].
+#' @param filter_function The function to be used for filtering, either
+#'   `filter_Datetime` (the default) or `filter_Date`
+#'
+#' @return A dataframe with the filtered data
+#' @export
+#'
+#' @examples
+#' arguments <- list(
+#'  list(start = "2023-08-17", only_Id = quote(Source == "Participant")),
+#'  list(end = "2023-08-17", only_Id = quote(Source == "Environment")))
+#'  #compare the unfiltered dataset
+#'  sample.data.environment %>% gg_overview(Id.colname = Source)
+#'  #compare the unfiltered dataset
+#'  sample.data.environment %>% 
+#'  filter_Datetime_multiple(arguments = arguments, filter_Date) %>%
+#'  gg_overview(Id.colname = Source)
+filter_Datetime_multiple <- function(dataset, 
+                                     arguments, 
+                                     filter_function = filter_Datetime) {
+
+  purrr::reduce(arguments, function(dataset, params) {
+    do.call({{ filter_function }}, c(list(dataset = dataset), params))
+  }, .init = dataset)
 }
diff --git a/R/gg_day.r b/R/gg_day.r
@@ -1,18 +1,22 @@
 
-#' Create a simple plot of light logger data, facetted by Day
+#' Create a simple Time-of-Day plot of light logger data, faceted by Date
 #'
-#' `gg_day` will create a simple ggplot for every data in a dataset. The result
-#' can further be manipulated like any ggplot. This will be sensible to refine
-#' styling or guides.
+#' [gg_day()] will create a simple ggplot for every data in a dataset. The
+#' result can further be manipulated like any ggplot. This will be sensible to
+#' refine styling or guides.
 #'
 #' Besides plotting, the function creates two new variables from the given
 #' `Datetime`:
 #' * `Day.data` is a factor that is used for facetting with [ggplot2::facet_wrap()]. Make sure to use this variable, if you change the faceting manually. Also, the function checks, whether this variable already exists. If it does, it will only convert it to a factor and do the faceting on that variable.
 #' * `Time.data` is an `hms` created with [hms::as_hms()] that is used for the x.axis
 #'
 #' The default scaling of the y-axis is a `symlog` scale, which is a logarithmic
-#' scale that only starts scaling after a given threshold (default = 0). This enables values of 0 in the plot, which are common in light logger data, and even enables negative values, which might be sensible for non-light data. See [symlog_trans()] for details on tweaking this scale. The scale can also be changed to a normal or logarithmic scale - see the y.scale argument for more.
-#' 
+#' scale that only starts scaling after a given threshold (default = 0). This
+#' enables values of 0 in the plot, which are common in light logger data, and
+#' even enables negative values, which might be sensible for non-light data. See
+#' [symlog_trans()] for details on tweaking this scale. The scale can also be
+#' changed to a normal or logarithmic scale - see the y.scale argument for more.
+#'
 #' @param dataset A light logger dataset. Expects a `dataframe`. If not imported
 #'   by [LightLogR], take care to choose a sensible variable for the `x.axis.`.
 #' @param x.axis,y.axis column name that contains the datetime (x, defaults to
@@ -31,38 +35,43 @@
 #' @param start.date,end.date Choose an optional start or end date within your
 #'   `dataset`. Expects a `date`, which can also be a `character` that is
 #'   interpretable as a date, e.g., `"2023-06-03"`. If you need a Datetime or
-#'   want to cut specific times of each day, use the [filter_Datetime] function.
-#'   Defaults to `NULL`, which means that the plot starts/ends with the
-#'   earliest/latest date within the `dataset`.
+#'   want to cut specific times of each day, use the [filter_Datetime()]
+#'   function. Defaults to `NULL`, which means that the plot starts/ends with
+#'   the earliest/latest date within the `dataset`.
 #' @param scales For [ggplot2::facet_wrap()], should scales be "fixed", "free"
 #'   or free in one dimension ("free_y" is the default). Expects a `character`.
-#' @param y.scale How should the y-axis be scaled? 
-#' * Defaults to `"symlog"`, which is a logarithmic scale that can also handle negative values. 
+#' @param y.scale How should the y-axis be scaled?
+#' * Defaults to `"symlog"`, which is a logarithmic scale that can also handle negative values.
 #' * `"log10"` would be a straight logarithmic scale, but cannot handle negative values.
 #' * `"identity"` does nothing (continuous scaling).
 #' * a transforming function, such as [symlog_trans()] or [scales::identity_trans()], which allow for more control.
-#' @param col optional column name that defines separate sets and colors them.
-#'   Expects anything that works with the layer data [ggplot2::aes()]. The
-#'   default color palette can be overwritten outside the function (see
-#'   examples).
+#' @param aes_col,aes_fill optional arguments that define separate sets and
+#'   colors or fills them. Expects anything that works with the layer data
+#'   [ggplot2::aes()]. The default color palette can be overwritten outside the
+#'   function (see examples).
 #' @param x.axis.breaks,y.axis.breaks Where should breaks occur on the x and
 #'   y.axis? Expects a `numeric vector` with all the breaks. If you want to
 #'   activate the default behaviour of [ggplot2], you need to put in
 #'   [ggplot2::waiver()].
 #' @param y.scale.sc `logical` for whether scientific notation shall be used.
 #'   Defaults to `FALSE`.
 #' @param geom What geom should be used for visualization? Expects a `character`
-#' * `"point"` for [ggplot2::geom_point()] (the default)
+#' * `"point"` for [ggplot2::geom_point()]
 #' * `"line"`  for [ggplot2::geom_line()]
-#' * as the value is just input into the `geom_` function from [ggplot2], other variants might work as well, but are not tested.
+#' * `"ribbon"` for [ggplot2::geom_ribbon()]
+#' * as the value is just input into the `geom_` function from [ggplot2], other variants work as well, but are not extensively tested.
 #' @param group Optional column name that defines separate sets. Useful for
 #'   certain geoms like `boxplot`.Expects anything that works with the layer
 #'   data [ggplot2::aes()]
 #' @param ... Other options that get passed to the main geom function. Can be
-#'   used to adjust to adjust size or linetype.
+#'   used to adjust to adjust size, linewidth, or linetype.
 #' @param interactive Should the plot be interactive? Expects a `logical`.
 #'   Defaults to `FALSE`.
-#' @param facetting Should an automated facet by day be applie? Default is `TRUE` and uses the `Day.data` variable that the function also creates if not present.
+#' @param facetting Should an automated facet by day be applied? Default is
+#'   `TRUE` and uses the `Day.data` variable that the function also creates if
+#'   not present.
+#' @param jco_color Should the [ggsci::scale_color_jco()] color palette be used?
+#'   Defaults to `TRUE`.
 #'
 #' @return A ggplot object
 #' @export
@@ -76,7 +85,7 @@
 #' x.axis = Datetime,
 #' y.axis = `MELANOPIC EDI`,
 #' y.axis.label = "mEDI (lx)",
-#' col = Source)
+#' aes_col = Source)
 #' plot
 #'
 #' #you can easily overwrite the color scale afterwards
@@ -90,7 +99,8 @@ gg_day <- function(dataset,
                    end.date = NULL,
                    x.axis = Datetime,
                    y.axis = MEDI,
-                   col = NULL,
+                   aes_col = NULL,
+                   aes_fill = NULL,
                    group = NULL,
                    geom = "point",
                    scales = "fixed",
@@ -105,21 +115,20 @@ gg_day <- function(dataset,
                    subtitle = NULL,
                    interactive = FALSE,
                    facetting = TRUE,
+                   jco_color = TRUE,
                    ...) {
 
 # Initial Checks ----------------------------------------------------------
 
-  x <- rlang::enexpr(x.axis)
+  x <- rlang::enexpr(x.axis) 
   y <- rlang::enexpr(y.axis)
-  axis_columns <- (purrr::map_chr(c(x,y), rlang::as_string))
+  axis_columns <- (purrr::map_chr(c(x,y), deparse1))
   stopifnot(
     "The given dataset is not a dataframe" = is.data.frame(dataset),
     "The given column for X is not in the Dataset. If you did not specify X, you are working with data not originating from LightLogR. Please specify an appropriate Datetime column" = 
       rlang::as_string(x) %in% names(dataset),
     "The given column for X is not a Datetime" =
       lubridate::is.POSIXct(dataset[[rlang::as_string(x)]]),
-    "The given column for Y is not in the Dataset" = 
-      rlang::as_string(y) %in% names(dataset),
     "scales must be one of `fixed`, `free_x`, `free_y`, or `free`" = 
       scales %in% c("free_y", "free_x", "fixed", "free"),
     "format.day must be a character. Please make shure it is of type `base::strptime`" = 
@@ -139,14 +148,26 @@ gg_day <- function(dataset,
     ribbon <- 
       list(
         ggplot2::geom_ribbon(
-          ggplot2::aes(ymin = 0, ymax = !!y),
+          ggplot2::aes(ymin = 0, ymax = !!y,
+                       group = {{ group }},
+                       col = {{ aes_col }},
+                       fill = {{ aes_fill }}),
           outline.type = "upper",
           ...
           )
       )
 
   }
 
+  #jco color palette
+  jco_color_scheme <- list()
+  if(jco_color) {
+    jco_color_scheme <- 
+      list(
+        ggsci::scale_color_jco()
+      )
+  }
+
   #filter by start and end date
   if(!is.null(start.date)) {
     dataset <-
@@ -190,11 +211,12 @@ gg_day <- function(dataset,
     eval(geom_function_expr)(
       ggplot2::aes(
         group = {{ group }},
-        col = {{ col }},
+        col = {{ aes_col }},
+        fill = {{ aes_fill }},
       ), ...) +
     ribbon +
     # Scales --------------------------------------------------------------
-    ggsci::scale_color_jco()+
+    jco_color_scheme+
     ggplot2::scale_x_time(breaks = x.axis.breaks, 
                           labels = scales::label_time(format = "%H:%M")) + 
     ggplot2::scale_y_continuous(