Make a tidy dataset out of freq() or descr() outputs

tb(
  x,
  order = 1,
  drop.var.col = FALSE,
  recalculate = TRUE,
  fct.to.chr = FALSE,
  ...
)

Arguments

x

a freq() or descr() output object.

order

Integer. Useful for grouped results produced with stby or dplyr::group_by. When set to 1 (default), the ordering is done using the grouping variables first. When set to 2, the ordering is done according to the analytical (not grouping) variable. When set to 3, the same ordering as with 2 is used, but the analytical variable is placed in first position. Depending on what function was used for grouping, the results will be different in subtle ways. See Details.

drop.var.col

Logical. For descr objects, drop the variable column. This is possible only when statistics are produced for a single variable; when multiple variables are present, this parameter is ignored. FALSE by default.

recalculate

Logical. TRUE by default. For grouped freq results, recalculate percentages to have total proportions sum up to 1. Defaults to TRUE.

fct.to.chr

Logical. When grouped objects are created with dplyr::group_by, the resulting tibble will have factor columns when the grouping variable itself is a factor. To convert them to character, set this to TRUE. See Details.

...

For internal use only.

Value

A tibble which is constructed following the tidy principles.

Details

stby, which is based on and by, initially make the first variable vary, keeping the other(s) constant. On the other hand, group_by initially keeps the first grouping variable(s) constant, making the last one vary. This will impact the ordering of the rows (and as a result, the cumulative percent columns, if present).

Also, keep in mind that while group_by shows NA groups by default, useNA = TRUE must be used to achieve the same results with stby.

Examples


tb(freq(iris$Species))
#> # A tibble: 4 × 6
#>   Species     freq pct_valid pct_valid_cum pct_tot pct_tot_cum
#>   <fct>      <dbl>     <dbl>         <dbl>   <dbl>       <dbl>
#> 1 setosa        50      33.3          33.3    33.3        33.3
#> 2 versicolor    50      33.3          66.7    33.3        66.7
#> 3 virginica     50      33.3         100      33.3       100  
#> 4 NA             0      NA            NA       0         100  
tb(descr(iris, stats = "common"))
#> # A tibble: 4 × 9
#>   variable      mean    sd   min   med   max n.valid     n pct.valid
#>   <chr>        <dbl> <dbl> <dbl> <dbl> <dbl>   <dbl> <dbl>     <dbl>
#> 1 Petal.Length  3.76 1.77    1    4.35   6.9     150   150       100
#> 2 Petal.Width   1.20 0.762   0.1  1.3    2.5     150   150       100
#> 3 Sepal.Length  5.84 0.828   4.3  5.8    7.9     150   150       100
#> 4 Sepal.Width   3.06 0.436   2    3      4.4     150   150       100

data("tobacco")
tb(stby(tobacco, tobacco$gender, descr, stats = "fivenum",check.nas = FALSE), 
   order=3)
#> NA detected in grouping variable(s); consider using useNA = TRUE
#> # A tibble: 8 × 7
#>   variable     gender    min     q1   med    q3   max
#>   <chr>        <fct>   <dbl>  <dbl> <dbl> <dbl> <dbl>
#> 1 BMI          F       9.01  23.0   25.9  29.5  39.4 
#> 2 BMI          M       8.83  22.5   25.1  28.0  36.8 
#> 3 age          F      18     34     50    66    80   
#> 4 age          M      18     34     49.5  66    80   
#> 5 cigs.per.day F       0      0      0    10.5  40   
#> 6 cigs.per.day M       0      0      0    11    40   
#> 7 samp.wgts    F       0.861  0.861  1.04  1.05  1.06
#> 8 samp.wgts    M       0.861  0.861  1.04  1.05  1.06
tb(stby(tobacco, tobacco$gender, descr, stats = "common", useNA = TRUE))
#> # A tibble: 12 × 10
#>    gender variable      mean      sd    min   med   max n.valid     n pct.valid
#>    <fct>  <chr>        <dbl>   <dbl>  <dbl> <dbl> <dbl>   <dbl> <dbl>     <dbl>
#>  1 F      BMI          26.1   4.95    9.01  25.9  39.4      475   489      97.1
#>  2 F      age          49.6  18.3    18     50    80        475   489      97.1
#>  3 F      cigs.per.day  6.88 12.0     0      0    40        468   489      95.7
#>  4 F      samp.wgts     1.00  0.0840  0.861  1.04  1.06     489   489     100  
#>  5 M      BMI          25.3   3.98    8.83  25.1  36.8      477   489      97.5
#>  6 M      age          49.6  18.3    18     49.5  80        478   489      97.8
#>  7 M      cigs.per.day  6.72 11.8     0      0    40        475   489      97.1
#>  8 M      samp.wgts     1.00  0.0840  0.861  1.04  1.06     489   489     100  
#>  9 NA     BMI          26.9   3.51   20.2   27.2  32.4       22    22     100  
#> 10 NA     age          50.9  18.2    19     55.5  80         22    22     100  
#> 11 NA     cigs.per.day  6.09  9.72    0      0    28         22    22     100  
#> 12 NA     samp.wgts     1.01  0.0811  0.861  1.05  1.06      22    22     100  

# Compare stby() and group_by() groups' ordering
tb(with(tobacco, stby(diseased, list(gender, smoker), freq, useNA = TRUE)))
#> # A tibble: 18 × 8
#>    gender smoker diseased  freq pct_valid pct_valid_cum pct_tot pct_tot_cum
#>    <fct>  <fct>  <fct>    <dbl>     <dbl>         <dbl>   <dbl>       <dbl>
#>  1 F      Yes    Yes         62       6.2           6.2     6.2         6.2
#>  2 F      Yes    No          85       8.5          14.7     8.5        14.7
#>  3 F      Yes    NA           0      NA            NA       0          14.7
#>  4 M      Yes    Yes         63       6.3          21       6.3        21  
#>  5 M      Yes    No          80       8            29       8          29  
#>  6 M      Yes    NA           0      NA            NA       0          29  
#>  7 NA     Yes    Yes          0       0            29       0          29  
#>  8 NA     Yes    No           8       0.8          29.8     0.8        29.8
#>  9 NA     Yes    NA           0      NA            NA       0          29.8
#> 10 F      No     Yes         49       4.9          34.7     4.9        34.7
#> 11 F      No     No         293      29.3          64      29.3        64  
#> 12 F      No     NA           0      NA            NA       0          64  
#> 13 M      No     Yes         47       4.7          68.7     4.7        68.7
#> 14 M      No     No         299      29.9          98.6    29.9        98.6
#> 15 M      No     NA           0      NA            NA       0          98.6
#> 16 NA     No     Yes          3       0.3          98.9     0.3        98.9
#> 17 NA     No     No          11       1.1         100       1.1       100  
#> 18 NA     No     NA           0      NA            NA       0         100  

if (FALSE) { # \dontrun{
tobacco |> dplyr::group_by(gender, smoker) |> freq(diseased) |> tb()
} # }