Skip to content

Commit

Permalink
Merge pull request #279 from larmarange/fixest
Browse files Browse the repository at this point in the history
support of instrumental variables
  • Loading branch information
larmarange authored Dec 27, 2024
2 parents da37e29 + 91b4245 commit 00c7719
Show file tree
Hide file tree
Showing 27 changed files with 236 additions and 32 deletions.
1 change: 0 additions & 1 deletion .github/workflows/R-CMD-check.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,6 @@ jobs:
- {os: ubuntu-latest, r: 'release'}
- {os: ubuntu-latest, r: 'oldrel-1'}
- {os: ubuntu-latest, r: 'oldrel-2'}
- {os: ubuntu-latest, r: 'oldrel-3'}

env:
GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
Expand Down
1 change: 1 addition & 0 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ Suggests:
ggeffects (>= 1.3.2),
ggstats (>= 0.2.1),
glmmTMB,
glmtoolbox,
glue,
gt,
gtsummary (>= 2.0.0),
Expand Down
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,7 @@ S3method(model_get_terms,betareg)
S3method(model_get_terms,brmsfit)
S3method(model_get_terms,cch)
S3method(model_get_terms,default)
S3method(model_get_terms,fixest)
S3method(model_get_terms,glmmTMB)
S3method(model_get_terms,model_fit)
S3method(model_get_weights,default)
Expand Down
11 changes: 11 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,10 +1,21 @@
# broom.helpers (development version)

**New supported models**

- support for `glmtoolbox::glmgee()` models (#274)

**New features**

- support of instrumental variables for `fixest` models (#279)
- new argument `instrumental_suffix` for `model_list_variables()`,
`tidy_add_variable_labels()` and `tidy_plus_plus()`

**Fixes**

- variable labels are now returned by `model_list_variables()` for `svycoxph`
models (#275)
- compatibility with R version 4.1 minimum (#276)
- fix for `tidy_add_n()` with models with a subset argument (#278)

# broom.helpers 1.17.0

Expand Down
2 changes: 1 addition & 1 deletion R/broom.helpers-package.R
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ utils::globalVariables(c("."))
dplyr::any_of(
c(
"y.level", "component", "term", "original_term", "variable",
"var_label", "var_class", "var_type",
"instrumental", "var_label", "var_class", "var_type",
"var_nlevels", "header_row", "contrasts", "contrasts_type",
"reference_row", "label", "n_obs", "n_ind", "n_event", "exposure"
)
Expand Down
3 changes: 1 addition & 2 deletions R/model_compute_terms_contributions.R
Original file line number Diff line number Diff line change
Expand Up @@ -84,8 +84,7 @@ model_compute_terms_contributions.default <- function(model) {
} # stop

# continuous variables converted to 1 to force positive values
d <- model |> purrr::pluck("data")
if (is.null(d)) d <- model |> model_get_model_frame()
d <- model |> model_get_model_frame()

if (is.null(d)) {
return(NULL)
Expand Down
5 changes: 4 additions & 1 deletion R/model_get_model_frame.R
Original file line number Diff line number Diff line change
Expand Up @@ -90,5 +90,8 @@ model_get_model_frame.model_fit <- function(model) {
#' @export
#' @rdname model_get_model_frame
model_get_model_frame.fixest <- function(model) {
stats::model.frame.default(model$fml, data = get(model$call$data, model$call_env))
stats::model.frame.default(
model_get_terms(model),
data = get(model$call$data, model$call_env)
)
}
10 changes: 9 additions & 1 deletion R/model_get_model_matrix.R
Original file line number Diff line number Diff line change
Expand Up @@ -112,8 +112,16 @@ model_get_model_matrix.model_fit <- function(model, ...) {

#' @export
#' @rdname model_get_model_matrix
#' @details
#' For `fixest` models, will recreate a model matrix with both main variables
#' and instrumental variables. For more options, see
#' [fixest::model.matrix.fixest].
model_get_model_matrix.fixest <- function(model, ...) {
stats::model.matrix.default(model$fml, data = get(model$call$data, model$call_env), ...)
stats::model.matrix.default(
model_get_terms(model),
data = get(model$call$data, model$call_env),
...
)
}

#' @export
Expand Down
28 changes: 26 additions & 2 deletions R/model_get_terms.R
Original file line number Diff line number Diff line change
Expand Up @@ -64,10 +64,34 @@ model_get_terms.betareg <- function(model) {
}

#' @export
#' @rdname model_get_model_matrix
model_get_terms.cch <- function(model, ...) {
#' @rdname model_get_terms
model_get_terms.cch <- function(model) {
stats::terms.formula(
model$call$formula |> stats::formula(),
data = model |> model_get_model_frame()
)
}

#' @export
#' @rdname model_get_terms
#' @details
#' For `fixest` models, return a term object combining main variables and
#' instrumental variables.
#'
model_get_terms.fixest <- function(model) {
fml <- model$fml
fiv <- model$iv_endo_fml

if (is.null(fiv)) {
f <- fml
} else {
f <-
paste(
deparse(fml),
"+",
deparse(fiv[[3]])
) |>
stats::as.formula()
}
stats::terms(f)
}
44 changes: 36 additions & 8 deletions R/model_list_variables.R
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,9 @@
#' If `TRUE`, will return only "variable" column.
#' @param add_var_type (`logical`)\cr
#' If `TRUE`, add `var_nlevels` and `var_type` columns.
#' @param instrumental_suffix (`string`)\cr
#' Suffix added to variable labels for instrumental variables (`fixest` models).
#' `NULL` to add nothing.
#' @return
#' A tibble with three columns:
#' * `variable`: the corresponding variable
Expand Down Expand Up @@ -54,15 +57,21 @@
#' ) |>
#' model_list_variables()
#' }
model_list_variables <- function(model, labels = NULL,
only_variable = FALSE, add_var_type = FALSE) {
model_list_variables <- function(model,
labels = NULL,
only_variable = FALSE,
add_var_type = FALSE,
instrumental_suffix = " (instrumental)") {
UseMethod("model_list_variables")
}

#' @rdname model_list_variables
#' @export
model_list_variables.default <- function(model, labels = NULL,
only_variable = FALSE, add_var_type = FALSE) {
model_list_variables.default <- function(model,
labels = NULL,
only_variable = FALSE,
add_var_type = FALSE,
instrumental_suffix = " (instrumental)") {
model_frame <- model_get_model_frame(model)
model_terms <- model_get_terms(model)

Expand Down Expand Up @@ -105,6 +114,19 @@ model_list_variables.default <- function(model, labels = NULL,
return(res$variable)
}

# specific case for instrumental variables
if (inherits(model, "fixest") && !is.null(instrumental_suffix)) {
iv <- all.vars(model$iv_endo_fml)
res <- res |>
dplyr::mutate(
var_label = dplyr::if_else(
.data$variable %in% iv,
paste0(.data$var_label, instrumental_suffix),
.data$var_label
)
)
}

if (add_var_type) {
return(.add_var_type(res, model))
}
Expand All @@ -115,8 +137,11 @@ model_list_variables.default <- function(model, labels = NULL,

#' @rdname model_list_variables
#' @export
model_list_variables.lavaan <- function(model, labels = NULL,
only_variable = FALSE, add_var_type = FALSE) {
model_list_variables.lavaan <- function(model,
labels = NULL,
only_variable = FALSE,
add_var_type = FALSE,
instrumental_suffix = " (instrumental)") {
res <- tibble::tibble(
variable = .clean_backticks(unique(model@ParTable$lhs))
) |>
Expand Down Expand Up @@ -150,8 +175,11 @@ model_list_variables.lavaan <- function(model, labels = NULL,

#' @rdname model_list_variables
#' @export
model_list_variables.logitr <- function(model, labels = NULL,
only_variable = FALSE, add_var_type = FALSE) {
model_list_variables.logitr <- function(model,
labels = NULL,
only_variable = FALSE,
add_var_type = FALSE,
instrumental_suffix = " (instrumental)") {
res <- model_list_variables.default(model, labels, FALSE)

if (!is.null(model$data$scalePar)) {
Expand Down
16 changes: 15 additions & 1 deletion R/tidy_add_reference_rows.R
Original file line number Diff line number Diff line change
Expand Up @@ -156,6 +156,11 @@ tidy_add_reference_rows <- function(
x$var_label <- NA_character_
} # temporary populate it

has_instrumental <- "instrumental" %in% names(x)
if (!has_instrumental) {
x$instrumental <- NA
} # temporary populate it

x <- x |>
dplyr::mutate(
reference_row = dplyr::if_else(
Expand Down Expand Up @@ -186,7 +191,11 @@ tidy_add_reference_rows <- function(
ref_rows <- terms_levels |>
dplyr::filter(.data$reference) |>
dplyr::mutate(reference_row = TRUE) |>
dplyr::select(dplyr::all_of(c("term", "variable", "label", "reference_row", "rank")))
dplyr::select(
dplyr::all_of(
c("term", "variable", "label", "reference_row", "rank")
)
)

if (!"label" %in% names(x)) {
ref_rows <- ref_rows |> dplyr::select(-all_of("label"))
Expand All @@ -204,6 +213,7 @@ tidy_add_reference_rows <- function(
var_class = dplyr::first(.data$var_class),
var_type = dplyr::first(.data$var_type),
var_label = dplyr::first(.data$var_label),
instrumental = dplyr::first(.data$instrumental),
var_nlevels = dplyr::first(.data$var_nlevels),
effect = dplyr::first(.data$effect),
contrasts = dplyr::first(.data$contrasts),
Expand Down Expand Up @@ -244,6 +254,10 @@ tidy_add_reference_rows <- function(
x <- x |> dplyr::select(-dplyr::all_of("var_label"))
}

if (!has_instrumental) {
x <- x |> dplyr::select(-dplyr::all_of("instrumental"))
}

x |>
dplyr::arrange(.data$rank) |>
dplyr::select(-dplyr::all_of("rank")) |>
Expand Down
10 changes: 9 additions & 1 deletion R/tidy_add_variable_labels.R
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,9 @@
#' A tidy tibble as produced by `tidy_*()` functions.
#' @param labels ([`formula-list-selector`][gtsummary::syntax])\cr
#' An optional named list or a named vector of custom variable labels.
#' @param instrumental_suffix (`string`)\cr
#' Suffix added to variable labels for instrumental variables (`fixest` models).
#' `NULL` to add nothing.
#' @param model (a model object, e.g. `glm`)\cr
#' The corresponding model, if not attached to `x`.
#' @inheritParams tidy_plus_plus
Expand All @@ -42,6 +45,7 @@
tidy_add_variable_labels <- function(x,
labels = NULL,
interaction_sep = " * ",
instrumental_suffix = " (instrumental)",
model = tidy_get_model(x)) {
if (is.null(model)) {
cli::cli_abort(c(
Expand Down Expand Up @@ -85,7 +89,11 @@ tidy_add_variable_labels <- function(x,
.update_vector(additional_labels)

# add the list of variables from model_list_variables
variable_list <- model_list_variables(model, labels = labels)
variable_list <- model_list_variables(
model,
labels = labels,
instrumental_suffix = instrumental_suffix
)
additional_labels <- variable_list$var_label
names(additional_labels) <- variable_list$variable
var_labels <- var_labels |>
Expand Down
17 changes: 17 additions & 0 deletions R/tidy_identify_variables.R
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,9 @@
#'
#' For dichotomous and categorical variables, `var_nlevels` corresponds to the number
#' of original levels in the corresponding variables.
#'
#' For `fixest` models, a new column `instrumental` is added to indicate
#' instrumental variables.
#' @param x (`data.frame`)\cr
#' A tidy tibble as produced by `tidy_*()` functions.
#' @param model (a model object, e.g. `glm`)\cr
Expand Down Expand Up @@ -65,6 +68,20 @@ tidy_identify_variables <- function(x, model = tidy_get_model(x),

.attributes <- .save_attributes(x)

# specific case for fixest models to handle instrumental variables
if (inherits(model, "fixest")) {
x <- x |>
dplyr::mutate(
original_term = .data$term,
instrumental = .data$term |> stringr::str_starts("fit_"),
term = dplyr::if_else(
.data$term |> stringr::str_starts("fit_"),
.data$term |> stringr::str_sub(5),
.data$term
)
)
}

# specific case for marginal means / effects / predictions / contrasts
if (
isTRUE(
Expand Down
7 changes: 6 additions & 1 deletion R/tidy_plus_plus.R
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,9 @@
#' (respectively named `"model_frame"` and `"model_matrix"`) and passed through.
#' @param variable_labels ([`formula-list-selector`][gtsummary::syntax])\cr
#' A named list or a named vector of custom variable labels.
#' @param instrumental_suffix (`string`)\cr
#' Suffix added to variable labels for instrumental variables (`fixest` models).
#' `NULL` to add nothing.
#' @param term_labels (`list` or `vector`)\cr
#' A named list or a named vector of custom term labels.
#' @param interaction_sep (`string`)\cr
Expand Down Expand Up @@ -153,6 +156,7 @@ tidy_plus_plus <- function(model,
exponentiate = FALSE,
model_matrix_attr = TRUE,
variable_labels = NULL,
instrumental_suffix = " (instrumental)",
term_labels = NULL,
interaction_sep = " * ",
categorical_terms_pattern = "{level}",
Expand Down Expand Up @@ -222,7 +226,8 @@ tidy_plus_plus <- function(model,
res <- res |>
tidy_add_variable_labels(
labels = variable_labels,
interaction_sep = interaction_sep
interaction_sep = interaction_sep,
instrumental_suffix = instrumental_suffix
) |>
tidy_add_term_labels(
labels = term_labels,
Expand Down
3 changes: 2 additions & 1 deletion data-raw/DATASET.R
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,8 @@ supported_models <-
"`pscl::zeroinfl()`", "Use `tidy_zeroinfl()` as `tidy_fun`.",
"`pscl::hurdle()`", "Use `tidy_zeroinfl()` as `tidy_fun`.",
"`betareg::betareg()`", "Use `tidy_parameters()` as `tidy_fun` with `component` argument to control with coefficients to return. `broom::tidy()` does not support the `exponentiate` argument for betareg models, use `tidy_parameters()` instead.", # nolint
"`survival::cch()`", "`Experimental support."
"`survival::cch()`", "`Experimental support.",
"`glmtoolbox::glmgee()`", "",
) |>
dplyr::arrange(.data$model, .locale = "en")

Expand Down
Binary file modified data/supported_models.rda
Binary file not shown.
9 changes: 5 additions & 4 deletions man/model_get_model_matrix.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading

0 comments on commit 00c7719

Please sign in to comment.