`data_tabulate()` gains `by` argument for crosstables #481

strengejacke · 2024-02-13T10:36:37Z

And include_na to include/omit missings.

library(datawizard)
data(efc)

set.seed(123)
efc$weights <- abs(rnorm(n = nrow(efc), mean = 1, sd = 0.5))
efc$e16sex[sample.int(nrow(efc), 5)] <- NA

data_tabulate(efc$c172code)
#> carer's level of education (efc$c172code) <numeric>
#> # total N=100 valid N=90
#> 
#> Value |  N | Raw % | Valid % | Cumulative %
#> ------+----+-------+---------+-------------
#> 1     |  8 |  8.00 |    8.89 |         8.89
#> 2     | 66 | 66.00 |   73.33 |        82.22
#> 3     | 16 | 16.00 |   17.78 |       100.00
#> <NA>  | 10 | 10.00 |    <NA> |         <NA>

data_tabulate(efc$c172code, include_na = FALSE)
#> carer's level of education (efc$c172code) <numeric>
#> # total N=90 valid N=90
#> 
#> Value |  N | Raw % | Valid % | Cumulative %
#> ------+----+-------+---------+-------------
#> 1     |  8 |  8.89 |    8.89 |         8.89
#> 2     | 66 | 73.33 |   73.33 |        82.22
#> 3     | 16 | 17.78 |   17.78 |       100.00

data_tabulate(efc$c172code, weights = efc$weights)
#> carer's level of education (efc$c172code) <numeric>
#> # total N=105 valid N=92 (weighted)
#> 
#> Value |  N | Raw % | Valid % | Cumulative %
#> ------+----+-------+---------+-------------
#> 1     | 10 |  9.52 |   10.87 |        10.87
#> 2     | 67 | 63.81 |   72.83 |        83.70
#> 3     | 15 | 14.29 |   16.30 |       100.00
#> <NA>  | 13 | 12.38 |    <NA> |         <NA>

data_tabulate(efc$c172code, include_na = FALSE, weights = efc$weights)
#> carer's level of education (efc$c172code) <numeric>
#> # total N=92 valid N=92 (weighted)
#> 
#> Value |  N | Raw % | Valid % | Cumulative %
#> ------+----+-------+---------+-------------
#> 1     | 10 | 10.87 |   10.87 |        10.87
#> 2     | 67 | 72.83 |   72.83 |        83.70
#> 3     | 15 | 16.30 |   16.30 |       100.00


data_tabulate(efc$c172code, by = efc$e16sex, proportions = "cell")
#> efc$c172code |       male |     female |     <NA> | Total
#> -------------+------------+------------+----------+------
#> 1            |  5  (5.0%) |  2  (2.0%) | 1 (1.0%) |     8
#> 2            | 31 (31.0%) | 33 (33.0%) | 2 (2.0%) |    66
#> 3            |  4  (4.0%) | 11 (11.0%) | 1 (1.0%) |    16
#> <NA>         |  5  (5.0%) |  4  (4.0%) | 1 (1.0%) |    10
#> -------------+------------+------------+----------+------
#> Total        |         45 |         50 |        5 |   100

data_tabulate(efc$c172code, by = efc$e16sex, proportions = "cell", include_na = FALSE)
#> efc$c172code |       male |     female | Total
#> -------------+------------+------------+------
#> 1            |  5  (5.8%) |  2  (2.3%) |     7
#> 2            | 31 (36.0%) | 33 (38.4%) |    64
#> 3            |  4  (4.7%) | 11 (12.8%) |    15
#> -------------+------------+------------+------
#> Total        |         40 |         46 |    86

data_tabulate(efc$c172code, by = efc$e16sex, proportions = "cell", weights = efc$weights)
#> efc$c172code |       male |     female |     <NA> | Total
#> -------------+------------+------------+----------+------
#> 1            |  5  (4.8%) |  3  (2.9%) | 2 (1.9%) |    10
#> 2            | 32 (30.5%) | 32 (30.5%) | 3 (2.9%) |    67
#> 3            |  3  (2.9%) | 11 (10.5%) | 1 (1.0%) |    15
#> <NA>         |  8  (7.6%) |  5  (4.8%) | 1 (1.0%) |    14
#> -------------+------------+------------+----------+------
#> Total        |         48 |         51 |        7 |   105

data_tabulate(efc$c172code, by = efc$e16sex, proportions = "cell", include_na = FALSE, weights = efc$weights)
#> efc$c172code |       male |     female | Total
#> -------------+------------+------------+------
#> 1            |  5  (5.8%) |  3  (3.5%) |     8
#> 2            | 32 (37.2%) | 32 (37.2%) |    64
#> 3            |  3  (3.5%) | 11 (12.8%) |    14
#> -------------+------------+------------+------
#> Total        |         40 |         46 |    86


data_tabulate(efc, "c172code", by = efc$e16sex, proportions = "row")
#> c172code |       male |     female |      <NA> | Total
#> ---------+------------+------------+-----------+------
#> 1        |  5 (62.5%) |  2 (25.0%) | 1 (12.5%) |     8
#> 2        | 31 (47.0%) | 33 (50.0%) | 2  (3.0%) |    66
#> 3        |  4 (25.0%) | 11 (68.8%) | 1  (6.2%) |    16
#> <NA>     |  5 (50.0%) |  4 (40.0%) | 1 (10.0%) |    10
#> ---------+------------+------------+-----------+------
#> Total    |         45 |         50 |         5 |   100

data_tabulate(efc, "c172code", by = efc$e16sex, proportions = "row", include_na = FALSE)
#> c172code |       male |     female | Total
#> ---------+------------+------------+------
#> 1        |  5 (71.4%) |  2 (28.6%) |     7
#> 2        | 31 (48.4%) | 33 (51.6%) |    64
#> 3        |  4 (26.7%) | 11 (73.3%) |    15
#> ---------+------------+------------+------
#> Total    |         40 |         46 |    86

data_tabulate(efc, "c172code", by = efc$e16sex, proportions = "row", weights = efc$weights)
#> c172code |       male |     female |      <NA> | Total
#> ---------+------------+------------+-----------+------
#> 1        |  5 (50.0%) |  3 (30.0%) | 2 (20.0%) |    10
#> 2        | 32 (47.8%) | 32 (47.8%) | 3  (4.5%) |    67
#> 3        |  3 (20.0%) | 11 (73.3%) | 1  (6.7%) |    15
#> <NA>     |  8 (57.1%) |  5 (35.7%) | 1  (7.1%) |    14
#> ---------+------------+------------+-----------+------
#> Total    |         48 |         51 |         7 |   105

data_tabulate(efc, "c172code", by = efc$e16sex, proportions = "row", include_na = FALSE, weights = efc$weights)
#> c172code |       male |     female | Total
#> ---------+------------+------------+------
#> 1        |  5 (62.5%) |  3 (37.5%) |     8
#> 2        | 32 (50.0%) | 32 (50.0%) |    64
#> 3        |  3 (21.4%) | 11 (78.6%) |    14
#> ---------+------------+------------+------
#> Total    |         40 |         46 |    86


data_tabulate(efc, "c172code", by = "e16sex", proportions = "column")
#> c172code |       male |     female |      <NA> | Total
#> ---------+------------+------------+-----------+------
#> 1        |  5 (11.1%) |  2  (4.0%) | 1 (20.0%) |     8
#> 2        | 31 (68.9%) | 33 (66.0%) | 2 (40.0%) |    66
#> 3        |  4  (8.9%) | 11 (22.0%) | 1 (20.0%) |    16
#> <NA>     |  5 (11.1%) |  4  (8.0%) | 1 (20.0%) |    10
#> ---------+------------+------------+-----------+------
#> Total    |         45 |         50 |         5 |   100

data_tabulate(efc, "c172code", by = "e16sex", proportions = "column", include_na = FALSE)
#> c172code |       male |     female | Total
#> ---------+------------+------------+------
#> 1        |  5 (12.5%) |  2  (4.3%) |     7
#> 2        | 31 (77.5%) | 33 (71.7%) |    64
#> 3        |  4 (10.0%) | 11 (23.9%) |    15
#> ---------+------------+------------+------
#> Total    |         40 |         46 |    86

efc |>
  data_group("e42dep") |>
  data_tabulate("c172code", by = "e16sex", proportions = "row")
#> Grouped by e42dep (1)
#> 
#> c172code |       male |       <NA> | Total
#> ---------+------------+------------+------
#> 2        | 2 (100.0%) | 0   (0.0%) |     2
#> <NA>     |   0 (NaN%) |   0 (NaN%) |     0
#> ---------+------------+------------+------
#> Total    |          2 |          0 |     2
#> 
#> Grouped by e42dep (2)
#> 
#> c172code |      male |    female |      <NA> | Total
#> ---------+-----------+-----------+-----------+------
#> 2        | 2 (50.0%) | 2 (50.0%) | 0  (0.0%) |     4
#> <NA>     |  0 (NaN%) |  0 (NaN%) |  0 (NaN%) |     0
#> ---------+-----------+-----------+-----------+------
#> Total    |         2 |         2 |         0 |     4
#> 
#> Grouped by e42dep (3)
#> 
#> c172code |      male |     female |      <NA> | Total
#> ---------+-----------+------------+-----------+------
#> 1        | 2 (50.0%) |  2 (50.0%) | 0  (0.0%) |     4
#> 2        | 4 (25.0%) | 11 (68.8%) | 1  (6.2%) |    16
#> 3        | 1 (16.7%) |  5 (83.3%) | 0  (0.0%) |     6
#> <NA>     | 1 (50.0%) |  0  (0.0%) | 1 (50.0%) |     2
#> ---------+-----------+------------+-----------+------
#> Total    |         8 |         18 |         2 |    28
#> 
#> Grouped by e42dep (4)
#> 
#> c172code |       male |     female |      <NA> | Total
#> ---------+------------+------------+-----------+------
#> 1        |  3 (75.0%) |  0  (0.0%) | 1 (25.0%) |     4
#> 2        | 23 (54.8%) | 18 (42.9%) | 1  (2.4%) |    42
#> 3        |  3 (30.0%) |  6 (60.0%) | 1 (10.0%) |    10
#> <NA>     |  3 (42.9%) |  4 (57.1%) | 0  (0.0%) |     7
#> ---------+------------+------------+-----------+------
#> Total    |         32 |         28 |         3 |    63
#> 
#> Grouped by e42dep (NA)
#> 
#> c172code |       male |     female |       <NA> | Total
#> ---------+------------+------------+------------+------
#> 2        | 0   (0.0%) | 2 (100.0%) | 0   (0.0%) |     2
#> <NA>     | 1 (100.0%) | 0   (0.0%) | 0   (0.0%) |     1
#> ---------+------------+------------+------------+------
#> Total    |          1 |          2 |          0 |     3

# errors
data_tabulate(efc$c172code, by = "e16sex")
#> Error: If `by` is a string indicating a variable name, `x` must be a data
#>   frame.

data_tabulate(efc$c172code, by = efc$e16sex[-1])
#> Error: Length of `by` must be equal to length of `x`.

data_tabulate(efc, "c172code", by = efc$e16sex[-1])
#> Error: Length of `by` must be equal to number of rows in `x`.

data_tabulate(efc, "c172code", by = "c16sex")
#> Error: The variable specified in `by` was not found in `x`. Did you mean
#>   "e16sex"?

data_tabulate(efc, "c172code", by = c("e16sex", "e42dep"))
#> Error: If `by` is a string indicating a variable name, `by` must be of length
#>   1.
#>   You may use `data_group()` to group by multiple variables, then call
#>   `data_tabulate()`.

^{Created on 2024-02-13 with reprex v2.1.0}

codecov · 2024-02-13T12:43:46Z

Codecov Report

Attention: 17 lines in your changes are missing coverage. Please review.

Comparison is base (3358b3e) 89.91% compared to head (42fcaf1) 90.34%.

Files	Patch %	Lines
R/data_xtabulate.R	90.05%	17 Missing ⚠️

Additional details and impacted files

@@            Coverage Diff             @@
##             main     #481      +/-   ##
==========================================
+ Coverage   89.91%   90.34%   +0.42%     
==========================================
  Files          72       73       +1     
  Lines        5485     5696     +211     
==========================================
+ Hits         4932     5146     +214     
+ Misses        553      550       -3

☔ View full report in Codecov by Sentry.
📢 Have feedback on the report? Share it here.

strengejacke · 2024-02-13T14:11:55Z

@etiennebacher WDYT? Enhancing data_tabulate(), to be able to print crosstables, using the by argument. I think it's better to get crosstables from data_tabulate() instead of adding another function?

etiennebacher

Looks great, thanks! I just have some minor comments. I also think it's better to have an additional argument than a different function for cross tables

NEWS.md

R/data_tabulate.R

Co-authored-by: Etienne Bacher <[email protected]>

etiennebacher

Thanks!

strengejacke · 2024-02-13T21:28:59Z

Not sure why the snapshot tests did not render properly, should now work. Once tests pass, I'll merge.

strengejacke added 14 commits February 13, 2024 11:41

data_tabluate() gains by argument for crosstables

d2fd1e2

fix

2c594b4

fix

e44b9de

fix

57579a9

fix

a9a13e5

fix

b6c9c73

fix

32b9dd1

fix

97b1f83

fix

2ee99fd

fix

42305eb

fix

3dec51d

version

38f2a34

fix

3d8013b

update tests

5de775d

strengejacke added 8 commits February 13, 2024 14:05

fixes update tests

c5fdc83

docs, add print_html methods

f7bcb7d

update news

652df44

code structure

cd14e60

fixes

0d69566

add tests

f34e78c

print markdown method

755f7ca

add tests for markdown print

07e7df2

strengejacke requested a review from etiennebacher February 13, 2024 14:10

strengejacke added 5 commits February 13, 2024 15:15

lintr

06146e5

align values in tables

12eb590

tests for HTML

62ded15

add test

03b2d0e

use same column as rowname

2acdfa1

etiennebacher changed the title ~~data_tabluate() gains by argument for crosstables~~ data_tabulate() gains by argument for crosstables Feb 13, 2024

etiennebacher requested changes Feb 13, 2024

View reviewed changes

NEWS.md Outdated Show resolved Hide resolved

R/data_tabulate.R Outdated Show resolved Hide resolved

R/data_tabulate.R Outdated Show resolved Hide resolved

strengejacke and others added 3 commits February 13, 2024 20:52

Update NEWS.md

3056bd5

Co-authored-by: Etienne Bacher <[email protected]>

address comments

5f733a1

cell -> full

8a8863d

etiennebacher approved these changes Feb 13, 2024

View reviewed changes

update snapshots

42fcaf1

one more test

c173c1c

strengejacke merged commit be6e2bf into main Feb 13, 2024
24 of 25 checks passed

strengejacke deleted the crosstable branch February 13, 2024 21:43

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

`data_tabulate()` gains `by` argument for crosstables #481

`data_tabulate()` gains `by` argument for crosstables #481

strengejacke commented Feb 13, 2024 •

edited

Loading

codecov bot commented Feb 13, 2024 •

edited

Loading

strengejacke commented Feb 13, 2024

etiennebacher left a comment •

edited

Loading

etiennebacher left a comment

strengejacke commented Feb 13, 2024

data_tabulate() gains by argument for crosstables #481

data_tabulate() gains by argument for crosstables #481

Conversation

strengejacke commented Feb 13, 2024 • edited Loading

codecov bot commented Feb 13, 2024 • edited Loading

Codecov Report

strengejacke commented Feb 13, 2024

etiennebacher left a comment • edited Loading

Choose a reason for hiding this comment

etiennebacher left a comment

Choose a reason for hiding this comment

strengejacke commented Feb 13, 2024

`data_tabulate()` gains `by` argument for crosstables #481

`data_tabulate()` gains `by` argument for crosstables #481

strengejacke commented Feb 13, 2024 •

edited

Loading

codecov bot commented Feb 13, 2024 •

edited

Loading

etiennebacher left a comment •

edited

Loading