Skip to content

Commit

Permalink
Merge pull request #3 from tscnlab/gg_days
Browse files Browse the repository at this point in the history
Gg days
  • Loading branch information
JZauner authored Nov 2, 2023
2 parents 0c0f024 + 4aaec99 commit 5d50323
Show file tree
Hide file tree
Showing 26 changed files with 823 additions and 73 deletions.
5 changes: 5 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -4,18 +4,23 @@ export("%>%")
export(Brown.check)
export(Brown.rec)
export(Brown2reference)
export(Datetime_breaks)
export(Datetime_limits)
export(aggregate_Datetime)
export(count.difftime)
export(create_Time.data)
export(cut_Datetime)
export(data2reference)
export(dominant_epoch)
export(filter_Date)
export(filter_Datetime)
export(filter_Datetime_multiple)
export(filter_Time)
export(gap_finder)
export(gap_handler)
export(gapless_Datetimes)
export(gg_day)
export(gg_days)
export(gg_overview)
export(import)
export(import.Dataset)
Expand Down
6 changes: 6 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,11 @@
# LightLogR 0.2.1.9000

* Exports the up to now internal function `count.difftime()` that is the basis for `dominant_epoch()`. But whereas the latter gets only the most common epoch, `count.difftime()` returns a table with the counts of all epochs. This is useful in conjunction with `gap_finder()`, to check the distribution of data intervals.

* Added the `gg_days()` function to visualize multiple days of data in a single plot. Alongside come two helper functions, `Datetime_limits()` and `Datetime_breaks()`, to set the limits and breaks of the x-axis.

* Added the `filter_Datetime_multiple()` function to filter for multiple Datetime ranges depending on certain conditions, e.g. different filter cutoffs for different participants. It wraps around `filter_Datetime()` or `filter_Date()`.

* Reworked the internals of the light logger data import functions. They now use a more straightforward function factory approach. For users the only visible change it that device specific functions now have the form `import$device()` instead of the old `import.device()`.

* Added the `symlog_trans()` function from a [post on stack overflow](https://stackoverflow.com/a/14674703). This function leads to a better visualization of light logger data, as a logarithmic transformation is necessary, but values of 0 are common. The function was integrated as a default for `gg_day()` and will likely be the basis of upcoming visualization functions.
Expand Down
2 changes: 1 addition & 1 deletion R/aaa.r
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
Time <- mEDI <- Time.data <- Datetime <- timestamp <- tz <- Day.data <- `DATE/TIME` <- n <- Datetime.rounded <- id <- sleep.colname.string <- file.name <- Interval <- original.datapoints.fleeting <- MEDI <- State.Brown <- Reference <- Reference.check <- Id <- Start.date.shift <- data <- Shift <- `MELANOPIC EDI` <- State <- group <- End <- Start <- Quant.x <- Quant.y <- is.implicit <- group.indices <- Id2 <- gap.id <- start <- end <- path <- auto.id <- n_max <- manual.id <- NULL
Time <- mEDI <- Time.data <- Datetime <- timestamp <- tz <- Day.data <- `DATE/TIME` <- n <- Datetime.rounded <- id <- sleep.colname.string <- file.name <- Interval <- original.datapoints.fleeting <- MEDI <- State.Brown <- Reference <- Reference.check <- Id <- Start.date.shift <- data <- Shift <- `MELANOPIC EDI` <- State <- group <- End <- Start <- Quant.x <- Quant.y <- is.implicit <- group.indices <- Id2 <- gap.id <- start <- end <- path <- auto.id <- n_max <- manual.id <- silent <- NULL

empty_function <- function() {
rsconnect::accountInfo()
Expand Down
59 changes: 58 additions & 1 deletion R/filter_Datetime.R
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
#' is FALSE). This is useful, e.g., when the first observation in the dataset
#' is slightly after midnight. If TRUE, it will count the length from midnight
#' on to avoid empty days in plotting with [gg_day()].
#' @param only_Id An expression of `ids` where the filtering should be applied to. If `NULL` (the default), the filtering will be applied to all `ids`. Based on the this expression, the dataset will be split in two and only where the given expression evaluates to `TRUE`, will the filtering take place. Afterwards both sets are recombined and sorted by `Datetime`.
#'
#' @return a `data.frame` object identical to `dataset` but with only the
#' specified Dates/Times.
Expand Down Expand Up @@ -86,12 +87,15 @@ filter_Datetime <- function(dataset,
length = NULL,
full.day = FALSE,
tz = NULL,
only_Id = NULL,
filter.expr = NULL) {

# Initial Checks ----------------------------------------------------------

filter.expr <- rlang::enexpr(filter.expr)

only_Id <- rlang::enexpr(only_Id)

Datetime.colname.defused <-
rlang::enexpr(Datetime.colname) %>% rlang::as_string()
#timezone
Expand Down Expand Up @@ -119,6 +123,16 @@ filter_Datetime <- function(dataset,

# Manipulation ----------------------------------------------------------

#split the dataset in two parts, based on the only_Id expression
if(!is.null(only_Id)) {
dataset_unfiltered <-
dataset %>%
dplyr::filter(!(!!only_Id), .preserve = TRUE)
dataset <-
dataset %>%
dplyr::filter(!!only_Id, .preserve = TRUE)
}

#calculate starting time if length and end are given
if(is.null(start) & !is.null(length) & !is.null(end)) {
start <- lubridate::as_datetime(end, tz = tz) - length
Expand Down Expand Up @@ -155,7 +169,10 @@ filter_Datetime <- function(dataset,
}

# Return --------------------------------------------------------------
dataset
if(!is.null(only_Id)) {
dplyr::bind_rows(dataset, dataset_unfiltered) %>%
dplyr::arrange({{ Datetime.colname }}, .by_group = TRUE)
} else dataset
}


Expand Down Expand Up @@ -184,4 +201,44 @@ filter_Date <- function(...,
filter_Datetime(...,
start = start,
end = end)
}

# multiple filter_Date -------------------------------------------------------------

#' Filter multiple times based on a list of arguments.
#'
#' [filter_Datetime_multiple()] is a wrapper around [filter_Datetime()] or
#' [filter_Date()] that allows the cumulative filtering of `Datetimes` based on
#' varying filter conditions. It is most useful in conjunction with the
#' `only_Id` argument, e.g., to selectively cut off dates depending on
#' participants (see examples)
#'
#' @param dataset A light logger dataset
#' @param arguments A list of arguments to be passed to [filter_Datetime()] or
#' [filter_Date()]. each list entry must itself be a list of arguments, e.g,
#' `list(start = "2021-01-01", only_Id = quote(Id == 216))`. Expressions have
#' to be quoted with [quote()] or [rlang::expr()].
#' @param filter_function The function to be used for filtering, either
#' `filter_Datetime` (the default) or `filter_Date`
#'
#' @return A dataframe with the filtered data
#' @export
#'
#' @examples
#' arguments <- list(
#' list(start = "2023-08-17", only_Id = quote(Source == "Participant")),
#' list(end = "2023-08-17", only_Id = quote(Source == "Environment")))
#' #compare the unfiltered dataset
#' sample.data.environment %>% gg_overview(Id.colname = Source)
#' #compare the unfiltered dataset
#' sample.data.environment %>%
#' filter_Datetime_multiple(arguments = arguments, filter_Date) %>%
#' gg_overview(Id.colname = Source)
filter_Datetime_multiple <- function(dataset,
arguments,
filter_function = filter_Datetime) {

purrr::reduce(arguments, function(dataset, params) {
do.call({{ filter_function }}, c(list(dataset = dataset), params))
}, .init = dataset)
}
78 changes: 50 additions & 28 deletions R/gg_day.r
Original file line number Diff line number Diff line change
@@ -1,18 +1,22 @@

#' Create a simple plot of light logger data, facetted by Day
#' Create a simple Time-of-Day plot of light logger data, faceted by Date
#'
#' `gg_day` will create a simple ggplot for every data in a dataset. The result
#' can further be manipulated like any ggplot. This will be sensible to refine
#' styling or guides.
#' [gg_day()] will create a simple ggplot for every data in a dataset. The
#' result can further be manipulated like any ggplot. This will be sensible to
#' refine styling or guides.
#'
#' Besides plotting, the function creates two new variables from the given
#' `Datetime`:
#' * `Day.data` is a factor that is used for facetting with [ggplot2::facet_wrap()]. Make sure to use this variable, if you change the faceting manually. Also, the function checks, whether this variable already exists. If it does, it will only convert it to a factor and do the faceting on that variable.
#' * `Time.data` is an `hms` created with [hms::as_hms()] that is used for the x.axis
#'
#' The default scaling of the y-axis is a `symlog` scale, which is a logarithmic
#' scale that only starts scaling after a given threshold (default = 0). This enables values of 0 in the plot, which are common in light logger data, and even enables negative values, which might be sensible for non-light data. See [symlog_trans()] for details on tweaking this scale. The scale can also be changed to a normal or logarithmic scale - see the y.scale argument for more.
#'
#' scale that only starts scaling after a given threshold (default = 0). This
#' enables values of 0 in the plot, which are common in light logger data, and
#' even enables negative values, which might be sensible for non-light data. See
#' [symlog_trans()] for details on tweaking this scale. The scale can also be
#' changed to a normal or logarithmic scale - see the y.scale argument for more.
#'
#' @param dataset A light logger dataset. Expects a `dataframe`. If not imported
#' by [LightLogR], take care to choose a sensible variable for the `x.axis.`.
#' @param x.axis,y.axis column name that contains the datetime (x, defaults to
Expand All @@ -31,38 +35,43 @@
#' @param start.date,end.date Choose an optional start or end date within your
#' `dataset`. Expects a `date`, which can also be a `character` that is
#' interpretable as a date, e.g., `"2023-06-03"`. If you need a Datetime or
#' want to cut specific times of each day, use the [filter_Datetime] function.
#' Defaults to `NULL`, which means that the plot starts/ends with the
#' earliest/latest date within the `dataset`.
#' want to cut specific times of each day, use the [filter_Datetime()]
#' function. Defaults to `NULL`, which means that the plot starts/ends with
#' the earliest/latest date within the `dataset`.
#' @param scales For [ggplot2::facet_wrap()], should scales be "fixed", "free"
#' or free in one dimension ("free_y" is the default). Expects a `character`.
#' @param y.scale How should the y-axis be scaled?
#' * Defaults to `"symlog"`, which is a logarithmic scale that can also handle negative values.
#' @param y.scale How should the y-axis be scaled?
#' * Defaults to `"symlog"`, which is a logarithmic scale that can also handle negative values.
#' * `"log10"` would be a straight logarithmic scale, but cannot handle negative values.
#' * `"identity"` does nothing (continuous scaling).
#' * a transforming function, such as [symlog_trans()] or [scales::identity_trans()], which allow for more control.
#' @param col optional column name that defines separate sets and colors them.
#' Expects anything that works with the layer data [ggplot2::aes()]. The
#' default color palette can be overwritten outside the function (see
#' examples).
#' @param aes_col,aes_fill optional arguments that define separate sets and
#' colors or fills them. Expects anything that works with the layer data
#' [ggplot2::aes()]. The default color palette can be overwritten outside the
#' function (see examples).
#' @param x.axis.breaks,y.axis.breaks Where should breaks occur on the x and
#' y.axis? Expects a `numeric vector` with all the breaks. If you want to
#' activate the default behaviour of [ggplot2], you need to put in
#' [ggplot2::waiver()].
#' @param y.scale.sc `logical` for whether scientific notation shall be used.
#' Defaults to `FALSE`.
#' @param geom What geom should be used for visualization? Expects a `character`
#' * `"point"` for [ggplot2::geom_point()] (the default)
#' * `"point"` for [ggplot2::geom_point()]
#' * `"line"` for [ggplot2::geom_line()]
#' * as the value is just input into the `geom_` function from [ggplot2], other variants might work as well, but are not tested.
#' * `"ribbon"` for [ggplot2::geom_ribbon()]
#' * as the value is just input into the `geom_` function from [ggplot2], other variants work as well, but are not extensively tested.
#' @param group Optional column name that defines separate sets. Useful for
#' certain geoms like `boxplot`.Expects anything that works with the layer
#' data [ggplot2::aes()]
#' @param ... Other options that get passed to the main geom function. Can be
#' used to adjust to adjust size or linetype.
#' used to adjust to adjust size, linewidth, or linetype.
#' @param interactive Should the plot be interactive? Expects a `logical`.
#' Defaults to `FALSE`.
#' @param facetting Should an automated facet by day be applie? Default is `TRUE` and uses the `Day.data` variable that the function also creates if not present.
#' @param facetting Should an automated facet by day be applied? Default is
#' `TRUE` and uses the `Day.data` variable that the function also creates if
#' not present.
#' @param jco_color Should the [ggsci::scale_color_jco()] color palette be used?
#' Defaults to `TRUE`.
#'
#' @return A ggplot object
#' @export
Expand All @@ -76,7 +85,7 @@
#' x.axis = Datetime,
#' y.axis = `MELANOPIC EDI`,
#' y.axis.label = "mEDI (lx)",
#' col = Source)
#' aes_col = Source)
#' plot
#'
#' #you can easily overwrite the color scale afterwards
Expand All @@ -90,7 +99,8 @@ gg_day <- function(dataset,
end.date = NULL,
x.axis = Datetime,
y.axis = MEDI,
col = NULL,
aes_col = NULL,
aes_fill = NULL,
group = NULL,
geom = "point",
scales = "fixed",
Expand All @@ -105,21 +115,20 @@ gg_day <- function(dataset,
subtitle = NULL,
interactive = FALSE,
facetting = TRUE,
jco_color = TRUE,
...) {

# Initial Checks ----------------------------------------------------------

x <- rlang::enexpr(x.axis)
x <- rlang::enexpr(x.axis)
y <- rlang::enexpr(y.axis)
axis_columns <- (purrr::map_chr(c(x,y), rlang::as_string))
axis_columns <- (purrr::map_chr(c(x,y), deparse1))
stopifnot(
"The given dataset is not a dataframe" = is.data.frame(dataset),
"The given column for X is not in the Dataset. If you did not specify X, you are working with data not originating from LightLogR. Please specify an appropriate Datetime column" =
rlang::as_string(x) %in% names(dataset),
"The given column for X is not a Datetime" =
lubridate::is.POSIXct(dataset[[rlang::as_string(x)]]),
"The given column for Y is not in the Dataset" =
rlang::as_string(y) %in% names(dataset),
"scales must be one of `fixed`, `free_x`, `free_y`, or `free`" =
scales %in% c("free_y", "free_x", "fixed", "free"),
"format.day must be a character. Please make shure it is of type `base::strptime`" =
Expand All @@ -139,14 +148,26 @@ gg_day <- function(dataset,
ribbon <-
list(
ggplot2::geom_ribbon(
ggplot2::aes(ymin = 0, ymax = !!y),
ggplot2::aes(ymin = 0, ymax = !!y,
group = {{ group }},
col = {{ aes_col }},
fill = {{ aes_fill }}),
outline.type = "upper",
...
)
)

}

#jco color palette
jco_color_scheme <- list()
if(jco_color) {
jco_color_scheme <-
list(
ggsci::scale_color_jco()
)
}

#filter by start and end date
if(!is.null(start.date)) {
dataset <-
Expand Down Expand Up @@ -190,11 +211,12 @@ gg_day <- function(dataset,
eval(geom_function_expr)(
ggplot2::aes(
group = {{ group }},
col = {{ col }},
col = {{ aes_col }},
fill = {{ aes_fill }},
), ...) +
ribbon +
# Scales --------------------------------------------------------------
ggsci::scale_color_jco()+
jco_color_scheme+
ggplot2::scale_x_time(breaks = x.axis.breaks,
labels = scales::label_time(format = "%H:%M")) +
ggplot2::scale_y_continuous(
Expand Down
Loading

0 comments on commit 5d50323

Please sign in to comment.