Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Newly created variables in ... can be processed in .at or .if #480

Merged
merged 1 commit into from
Feb 7, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 14 additions & 16 deletions R/data_modify.R
Original file line number Diff line number Diff line change
Expand Up @@ -23,17 +23,17 @@
#' - Using `NULL` as right-hand side removes a variable from the data frame.
#' Example: `Petal.Width = NULL`.
#'
#' Note that newly created variables can be used in subsequent expressions.
#' See also 'Examples'.
#' Note that newly created variables can be used in subsequent expressions,
#' including `.at` or `.if`. See also 'Examples'.
#'
#' @param .at A character vector of variable names that should be modified. This
#' argument is used in combination with the `.modify` argument. Note that only one
#' of `.at` or `.if` can be provided, but not both at the same time. Newly created
#' variables in `...` are not affected by `.at`.
#' variables in `...` can also be selected, see 'Examples'.
#' @param .if A function that returns `TRUE` for columns in the data frame where
#' `.if` applies. This argument is used in combination with the `.modify` argument.
#' Note that only one of `.at` or `.if` can be provided, but not both at the same
#' time. Newly created variables in `...` are not affected by `.if`.
#' time. Newly created variables in `...` can also be selected, see 'Examples'.
#' @param .modify A function that modifies the variables defined in `.at` or `.if`.
#' This argument is used in combination with either the `.at` or the `.if` argument.
#' Note that the modified variable (i.e. the result from `.modify`) must be either
Expand Down Expand Up @@ -113,15 +113,13 @@
#' # can be combined with dots
#' data_modify(d, new_length = Petal.Length * 2, .at = "Species", .modify = as.numeric)
#'
#' # note that new variables cannot be used in `.at` or `.if` arguments
#' # this example would throw an error
#' \dontrun{
#' # new variables used in `.at` or `.if`
#' data_modify(
#' d,
#' new_length = Petal.Length * 2,
#' .at = c("Species", "new_length"),
#' .modify = as.numeric
#' )}
#' .at = c("Petal.Length", "new_length"),
#' .modify = round
#' )
#'
#' # combine "data_find()" and ".at" argument
#' out <- data_modify(
Expand All @@ -146,7 +144,6 @@
#' @export
data_modify.data.frame <- function(data, ..., .if = NULL, .at = NULL, .modify = NULL) {
dots <- eval(substitute(alist(...)))
column_names <- colnames(data)

# check if we have dots, or only at/modify ----

Expand All @@ -164,10 +161,10 @@
}
}
# expression is given as character string, e.g.
# a <- "double_SepWidth = 2 * Sepal.Width"

Check warning on line 164 in R/data_modify.R

View workflow job for this annotation

GitHub Actions / lint-changed-files / lint-changed-files

file=R/data_modify.R,line=164,col=9,[commented_code_linter] Remove commented code.
# data_modify(iris, a)

Check warning on line 165 in R/data_modify.R

View workflow job for this annotation

GitHub Actions / lint-changed-files / lint-changed-files

file=R/data_modify.R,line=165,col=9,[commented_code_linter] Remove commented code.
# or as character vector, e.g.
# data_modify(iris, c("var_a = Sepal.Width / 10", "var_b = Sepal.Width * 10"))

Check warning on line 167 in R/data_modify.R

View workflow job for this annotation

GitHub Actions / lint-changed-files / lint-changed-files

file=R/data_modify.R,line=167,col=9,[commented_code_linter] Remove commented code.
character_symbol <- tryCatch(.dynEval(dots[[1]]), error = function(e) NULL)
# do we have a character vector? Then we can proceed
if (is.character(character_symbol)) {
Expand All @@ -184,11 +181,11 @@
symbol <- dots[[i]]

# expression is given as character string in a variable, but named, e.g.
# a <- "2 * Sepal.Width"

Check warning on line 184 in R/data_modify.R

View workflow job for this annotation

GitHub Actions / lint-changed-files / lint-changed-files

file=R/data_modify.R,line=184,col=9,[commented_code_linter] Remove commented code.
# data_modify(iris, double_SepWidth = a)

Check warning on line 185 in R/data_modify.R

View workflow job for this annotation

GitHub Actions / lint-changed-files / lint-changed-files

file=R/data_modify.R,line=185,col=9,[commented_code_linter] Remove commented code.
# we reconstruct the symbol as if it were provided as literal expression.
# However, we need to check that we don't have a character vector,
# like: data_modify(iris, new_var = "a")

Check warning on line 188 in R/data_modify.R

View workflow job for this annotation

GitHub Actions / lint-changed-files / lint-changed-files

file=R/data_modify.R,line=188,col=9,[commented_code_linter] Remove commented code.
# this one should be recycled instead.
if (!is.character(symbol)) {
eval_symbol <- .dynEval(symbol, ifnotfound = NULL)
Expand Down Expand Up @@ -248,7 +245,7 @@
}

# check if we have at/modify ----
data <- .modify_at(data, .at, .if, .modify, column_names)
data <- .modify_at(data, .at, .if, .modify)

data
}
Expand All @@ -258,7 +255,6 @@
# we need to evaluate dots here, and pass them with "do.call" to
# the data.frame method later...
dots <- match.call(expand.dots = FALSE)[["..."]]
column_names <- colnames(data)

# works only for dplyr >= 0.8.0
grps <- attr(data, "groups", exact = TRUE)
Expand All @@ -282,10 +278,10 @@
}
}
# expression is given as character string, e.g.
# a <- "double_SepWidth = 2 * Sepal.Width"

Check warning on line 281 in R/data_modify.R

View workflow job for this annotation

GitHub Actions / lint-changed-files / lint-changed-files

file=R/data_modify.R,line=281,col=7,[commented_code_linter] Remove commented code.
# data_modify(iris, a)

Check warning on line 282 in R/data_modify.R

View workflow job for this annotation

GitHub Actions / lint-changed-files / lint-changed-files

file=R/data_modify.R,line=282,col=7,[commented_code_linter] Remove commented code.
# or as character vector, e.g.
# data_modify(iris, c("var_a = Sepal.Width / 10", "var_b = Sepal.Width * 10"))

Check warning on line 284 in R/data_modify.R

View workflow job for this annotation

GitHub Actions / lint-changed-files / lint-changed-files

file=R/data_modify.R,line=284,col=7,[commented_code_linter] Remove commented code.
character_symbol <- tryCatch(.dynEval(dots[[1]]), error = function(e) NULL)
# do we have a character vector? Then we can proceed
if (is.character(character_symbol)) {
Expand Down Expand Up @@ -313,7 +309,7 @@
}

# check if we have at/modify ----
data <- .modify_at(data, .at, .if, .modify, column_names)
data <- .modify_at(data, .at, .if, .modify)

# set back attributes and class
data <- .replace_attrs(data, attr_data)
Expand All @@ -324,7 +320,7 @@

# helper -------------

.modify_at <- function(data, .at, .if, .modify, column_names) {
.modify_at <- function(data, .at, .if, .modify) {
# check if ".at" or ".if" is defined, but not ".modify"
if (is.null(.modify)) {
if (!is.null(.at) || !is.null(.if)) {
Expand All @@ -345,9 +341,11 @@
insight::format_error("You need to specify either `.at` or `.if`.")
}

column_names <- colnames(data)

# if we have ".if" defined, specify ".at"
if (!is.null(.if)) {
.at <- column_names[vapply(data[column_names], .if, logical(1))]
.at <- column_names[vapply(data, .if, logical(1))]
}
# check for valid defined column names
if (!all(.at %in% column_names)) {
Expand Down
18 changes: 8 additions & 10 deletions man/data_modify.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

15 changes: 12 additions & 3 deletions tests/testthat/test-data_modify.R
Original file line number Diff line number Diff line change
Expand Up @@ -495,6 +495,7 @@ test_that("data_modify works with functions that return character vectors", {
test_that("data_modify .if/.at arguments", {
data(iris)
d <- iris[1:5, ]
# validate results
out <- data_modify(d, .at = "Species", .modify = as.numeric)
expect_identical(out$Species, c(1, 1, 1, 1, 1))
out <- data_modify(d, .if = is.factor, .modify = as.numeric)
Expand All @@ -505,39 +506,47 @@ test_that("data_modify .if/.at arguments", {
"Sepal.Length", "Sepal.Width", "Petal.Length", "Petal.Width",
"Species", "new_length"
))
# .at and .if cannot be used at same timne
expect_error(
data_modify(d, .at = "Species", .if = is.factor, .modify = as.numeric),
regex = "You cannot use both"
)
# modify must be a function
expect_error(
data_modify(d, .at = "Species", .modify = "a"),
regex = "`.modify` must"
)
# unknown variable
expect_error(
data_modify(d, .at = c("Species", "Test"), .modify = as.numeric),
regex = "Variable \"Test\""
)
# unknown variables
expect_error(
data_modify(d, .at = c("Species", "Hi", "Test"), .modify = as.numeric),
regex = "Variables \"Hi\" and \"Test\""
)
# one of .at or .if must be specified
expect_error(
data_modify(d, .modify = as.numeric),
regex = "You need to specify"
)
# function not applicable to factors
expect_error(
data_modify(d, .at = "Species", .modify = function(x) 2 / y + x),
regex = "Error in modifying variable"
)
# function not applicable to factors
expect_error(
data_modify(d, .at = "Species", .modify = function(x) 2 * x),
regex = "Error in modifying variable"
)
# .modify needs to be specified
expect_error(
data_modify(d, .at = "Species", .if = is.factor),
regex = "You need to specify"
)
# newly created variables are not modified by if/at
out <- data_modify(d, new_length = Petal.Length * 2, .if = is.numeric, .modify = as.factor)
expect_identical(out$new_length, c(2.8, 2.8, 2.6, 3, 2.8))
# newly created variables are processed by if/at
out <- data_modify(d, new_length = Petal.Length * 2, .if = is.numeric, .modify = round)
expect_equal(out$new_length, c(3, 3, 3, 3, 3), ignore_attr = TRUE)
})
Loading