Cut a continuous variable into equal-sized groups.

eqcut(
  x,
  ngroups,
  labeling = eqcut.default.labeling,
  withhold = NULL,
  varlabel = if (has.label(x)) label(x) else deparse(substitute(x)),
  quantile.type = 7,
  right = FALSE,
  ...
)

eqcut.default.labeling(x, xcat, which, what, from, to, ...)

Arguments

x

A numeric vector.

ngroups

The number of groups desired.

labeling

A function that produces the category labels (see Details).

withhold

A named list of logical vectors (see Details).

varlabel

A character string to be used as a label for x, or NULL.

quantile.type

An integer from 1 to 9, passed as the type argument to function quantile.

right

Should intervals be right-closed? (passed to cut).

...

Further arguments passed on to function labeling.

xcat

A factor returned by cut.

which, what

Character vectors for labeling the categories in an appropriate way (see Examples).

from, to

Numeric vectors giving the ranges covered by the categories of x.

Value

A factor of the same length as x. There are ngroups levels plus one additional level for each element of withhold.

Details

The function labeling must have the signature function(x, xcat, which, what, from, to, ...) and produces the character vector of factor levels. See below for an example.

The withhold list can be used when x contains special values that should not be considered in the calculation of the quantiles used to create the ngroups categories. The special values are given a label that corresponds to the name of the corresponding list element. See below for an example.

Functions

  • eqcut.default.labeling(): The default labeling function.

See also

Examples

x <- sample(100)
table(eqcut(x, 2))
#> 
#> Below median x: [1.00,50.5)  Above median x: [50.5,100] 
#>                          50                          50 
table(eqcut(x, 3))
#> 
#> 1st tertile of x: [1.00,34.0) 2nd tertile of x: [34.0,67.0) 
#>                            33                            33 
#>  3rd tertile of x: [67.0,100] 
#>                            34 
table(eqcut(x, 4))
#> 
#> 1st quartile of x: [1.00,25.8) 2nd quartile of x: [25.8,50.5) 
#>                             25                             25 
#> 3rd quartile of x: [50.5,75.3)  4th quartile of x: [75.3,100] 
#>                             25                             25 
table(eqcut(x, 5))
#> 
#> 1st quintile of x: [1.00,20.8) 2nd quintile of x: [20.8,40.6) 
#>                             20                             20 
#> 3rd quintile of x: [40.6,60.4) 4th quintile of x: [60.4,80.2) 
#>                             20                             20 
#>  5th quintile of x: [80.2,100] 
#>                             20 
table(eqcut(x, 6))
#> 
#> 1st sextile of x: [1.00,17.5) 2nd sextile of x: [17.5,34.0) 
#>                            17                            16 
#> 3rd sextile of x: [34.0,50.5) 4th sextile of x: [50.5,67.0) 
#>                            17                            16 
#> 5th sextile of x: [67.0,83.5)  6th sextile of x: [83.5,100] 
#>                            17                            17 
table(eqcut(x, 7))
#> 
#> 1st septile of x: [1.00,15.1) 2nd septile of x: [15.1,29.3) 
#>                            15                            14 
#> 3rd septile of x: [29.3,43.4) 4th septile of x: [43.4,57.6) 
#>                            14                            14 
#> 5th septile of x: [57.6,71.7) 6th septile of x: [71.7,85.9) 
#>                            14                            14 
#>  7th septile of x: [85.9,100] 
#>                            15 
table(eqcut(x, 8))
#> 
#> 1st octile of x: [1.00,13.4) 2nd octile of x: [13.4,25.8) 
#>                           13                           12 
#> 3rd octile of x: [25.8,38.1) 4th octile of x: [38.1,50.5) 
#>                           13                           12 
#> 5th octile of x: [50.5,62.9) 6th octile of x: [62.9,75.3) 
#>                           12                           13 
#> 7th octile of x: [75.3,87.6)  8th octile of x: [87.6,100] 
#>                           12                           13 

# An example of using eqcut in a table with custom labeling function.
dat <- expand.grid(id=1:100, sex=c("Male", "Female"), treat=c("Treated", "Placebo"))
dat$age <- runif(nrow(dat), 18, 50)
dat$wt <- exp(rnorm(nrow(dat), log(75 + 10*(dat$sex=="Male")), 0.2))
dat$auc <- ifelse(dat$treat=="Placebo", NA, exp(rnorm(nrow(dat), log(1000), 0.34)))
dat$auc[3] <- NA  # Add a missing value

label(dat$sex) <- "Sex"
label(dat$age) <- "Age"
label(dat$wt)  <- "Weight"
label(dat$auc) <- "AUC"
units(dat$age) <- "y"
units(dat$wt)  <- "kg"
units(dat$auc) <- "ng.h/mL"

w <- list(Placebo=(dat$treat=="Placebo"), Excluded=is.na(dat$auc))
f <- function(x, xcat, which, what, from, to, ...) {
   what <- sub("of ", "of<br/>", what)
   sprintf("%s %s<br/>&ge;%s to &lt;%s",
       which, what, signif_pad(from, 3, FALSE), signif_pad(to, 3, FALSE))
}
table1(~ sex + age + wt | eqcut(auc, 3, f, w), data=dat)
#> <table class="Rtable1">
#> <thead>
#> <tr>
#> <th class='rowlabel firstrow lastrow'></th>
#> <th class='firstrow lastrow'><span class='stratlabel'>1st tertile of<br/>AUC (ng.h/mL)<br/>&ge;383 to &lt;837<br/><span class='stratn'>(N=66)</span></span></th>
#> <th class='firstrow lastrow'><span class='stratlabel'>2nd tertile of<br/>AUC (ng.h/mL)<br/>&ge;837 to &lt;1150<br/><span class='stratn'>(N=66)</span></span></th>
#> <th class='firstrow lastrow'><span class='stratlabel'>3rd tertile of<br/>AUC (ng.h/mL)<br/>&ge;1150 to &lt;2435<br/><span class='stratn'>(N=67)</span></span></th>
#> <th class='firstrow lastrow'><span class='stratlabel'>Placebo<br/><span class='stratn'>(N=200)</span></span></th>
#> <th class='firstrow lastrow'><span class='stratlabel'>Excluded<br/><span class='stratn'>(N=1)</span></span></th>
#> <th class='firstrow lastrow'><span class='stratlabel'>Overall<br/><span class='stratn'>(N=400)</span></span></th>
#> </tr>
#> </thead>
#> <tbody>
#> <tr>
#> <td class='rowlabel firstrow'><span class='varlabel'>Sex</span></td>
#> <td class='firstrow'></td>
#> <td class='firstrow'></td>
#> <td class='firstrow'></td>
#> <td class='firstrow'></td>
#> <td class='firstrow'></td>
#> <td class='firstrow'></td>
#> </tr>
#> <tr>
#> <td class='rowlabel'>Male</td>
#> <td>34 (51.5%)</td>
#> <td>33 (50.0%)</td>
#> <td>32 (47.8%)</td>
#> <td>100 (50.0%)</td>
#> <td>1 (100%)</td>
#> <td>200 (50.0%)</td>
#> </tr>
#> <tr>
#> <td class='rowlabel lastrow'>Female</td>
#> <td class='lastrow'>32 (48.5%)</td>
#> <td class='lastrow'>33 (50.0%)</td>
#> <td class='lastrow'>35 (52.2%)</td>
#> <td class='lastrow'>100 (50.0%)</td>
#> <td class='lastrow'>0 (0%)</td>
#> <td class='lastrow'>200 (50.0%)</td>
#> </tr>
#> <tr>
#> <td class='rowlabel firstrow'><span class='varlabel'>Age<span class='varunits'> (y)</span></span></td>
#> <td class='firstrow'></td>
#> <td class='firstrow'></td>
#> <td class='firstrow'></td>
#> <td class='firstrow'></td>
#> <td class='firstrow'></td>
#> <td class='firstrow'></td>
#> </tr>
#> <tr>
#> <td class='rowlabel'>Mean (SD)</td>
#> <td>35.5 (9.86)</td>
#> <td>33.1 (10.4)</td>
#> <td>35.8 (9.10)</td>
#> <td>34.0 (9.68)</td>
#> <td>42.6 (NA)</td>
#> <td>34.4 (9.75)</td>
#> </tr>
#> <tr>
#> <td class='rowlabel lastrow'>Median [Min, Max]</td>
#> <td class='lastrow'>38.6 [18.1, 50.0]</td>
#> <td class='lastrow'>33.2 [18.5, 49.5]</td>
#> <td class='lastrow'>36.4 [18.5, 49.8]</td>
#> <td class='lastrow'>34.3 [18.0, 49.9]</td>
#> <td class='lastrow'>42.6 [42.6, 42.6]</td>
#> <td class='lastrow'>35.0 [18.0, 50.0]</td>
#> </tr>
#> <tr>
#> <td class='rowlabel firstrow'><span class='varlabel'>Weight<span class='varunits'> (kg)</span></span></td>
#> <td class='firstrow'></td>
#> <td class='firstrow'></td>
#> <td class='firstrow'></td>
#> <td class='firstrow'></td>
#> <td class='firstrow'></td>
#> <td class='firstrow'></td>
#> </tr>
#> <tr>
#> <td class='rowlabel'>Mean (SD)</td>
#> <td>82.8 (16.2)</td>
#> <td>80.4 (16.0)</td>
#> <td>79.5 (15.9)</td>
#> <td>82.7 (17.0)</td>
#> <td>64.0 (NA)</td>
#> <td>81.7 (16.5)</td>
#> </tr>
#> <tr>
#> <td class='rowlabel lastrow'>Median [Min, Max]</td>
#> <td class='lastrow'>83.5 [42.8, 115]</td>
#> <td class='lastrow'>81.3 [52.7, 123]</td>
#> <td class='lastrow'>75.9 [48.2, 129]</td>
#> <td class='lastrow'>81.2 [40.3, 146]</td>
#> <td class='lastrow'>64.0 [64.0, 64.0]</td>
#> <td class='lastrow'>81.1 [40.3, 146]</td>
#> </tr>
#> </tbody>
#> </table>