Simulate Responses for VGLMs and VGAMs

Simulate one or more responses from the distribution corresponding to a fitted model object.

# S3 method for class 'vlm'
simulate(object, nsim = 1, seed = NULL, ...)

Arguments

object

an object representing a fitted model. Usually an object of class vglm-class or vgam-class.

nsim, seed

Same as simulate.

...: additional optional arguments.

Value

Similar to simulate. Note that many VGAM family functions can handle multiple responses. This can result in a longer data frame with more rows (nsim multiplied by n rather than the ordinary n). In the future an argument may be available so that there is always n rows no matter how many responses were inputted.

Details

This is a methods function for simulate and hopefully should behave in a very similar manner. Only VGAM family functions with a simslot slot have been implemented for simulate.

Warning

With multiple response and/or multivariate responses, the order of the elements may differ. For some VGAM families, the order is \(n \times N \times F\), where \(n\) is the sample size, \(N\) is nsim and \(F\) is ncol(fitted(vglmObject)). For other VGAM families, the order is \(n \times F \times N\). An example of each is given below.

Examples

nn <- 10; mysize <- 20; set.seed(123)
bdata <- data.frame(x2 = rnorm(nn))
bdata <- transform(bdata,
  y1   = rbinom(nn, size = mysize, p = logitlink(1+x2, inverse = TRUE)),
  y2   = rbinom(nn, size = mysize, p = logitlink(1+x2, inverse = TRUE)),
  f1   = factor(as.numeric(rbinom(nn, size = 1,
                                  p = logitlink(1+x2, inverse = TRUE)))))
(fit1 <- vglm(cbind(y1, aaa = mysize - y1) ~ x2,  # Matrix response (2-colns)
              binomialff, data = bdata))
#> 
#> Call:
#> vglm(formula = cbind(y1, aaa = mysize - y1) ~ x2, family = binomialff, 
#>     data = bdata)
#> 
#> 
#> Coefficients:
#> (Intercept)          x2 
#>   0.7631782   0.7719893 
#> 
#> Degrees of Freedom: 10 Total; 8 Residual
#> Residual deviance: 7.644748 
#> Log-likelihood: -19.68401 
(fit2 <- vglm(f1 ~ x2, binomialff, model = TRUE, data = bdata)) # Factor response
#> 
#> Call:
#> vglm(formula = f1 ~ x2, family = binomialff, data = bdata, model = TRUE)
#> 
#> 
#> Coefficients:
#> (Intercept)          x2 
#>    3.202873    4.247620 
#> 
#> Degrees of Freedom: 10 Total; 8 Residual
#> Residual deviance: 5.485974 
#> Log-likelihood: -2.742987 

set.seed(123); simulate(fit1, nsim = 8)
#>    sim_1 sim_2 sim_3 sim_4 sim_5 sim_6 sim_7 sim_8
#> 1   0.65  0.40  0.45  0.40  0.70  0.75  0.55  0.50
#> 2   0.55  0.65  0.60  0.50  0.65  0.65  0.80  0.60
#> 3   0.90  0.85  0.85  0.85  0.90  0.80  0.90  0.85
#> 4   0.55  0.70  0.40  0.60  0.75  0.80  0.75  1.00
#> 5   0.55  0.85  0.65  0.90  0.80  0.70  0.60  0.70
#> 6   1.00  0.80  0.85  0.90  0.95  0.95  0.90  0.95
#> 7   0.75  0.80  0.75  0.70  0.85  0.85  0.65  0.80
#> 8   0.60  0.25  0.45  0.35  0.45  0.50  0.55  0.50
#> 9   0.55  0.60  0.60  0.60  0.65  0.40  0.45  0.60
#> 10  0.60  0.40  0.70  0.70  0.50  0.65  0.60  0.75
set.seed(123); c(simulate(fit2, nsim = 3))  # Use c() when model = TRUE
#> $sim_1
#>  [1] 1 1 1 1 1 1 1 0 1 1
#> Levels: 0 1
#> 
#> $sim_2
#>  [1] 0 1 1 1 1 1 1 0 1 0
#> Levels: 0 1
#> 
#> $sim_3
#>  [1] 0 1 1 0 1 1 1 0 1 1
#> Levels: 0 1
#> 

# An n x N x F example
set.seed(123); n <- 100
bdata <- data.frame(x2 = runif(n), x3 = runif(n))
bdata <- transform(bdata, y1 = rnorm(n, 1 + 2 * x2),
                          y2 = rnorm(n, 3 + 4 * x2))
fit1 <- vglm(cbind(y1, y2) ~ x2, binormal(eq.sd = TRUE), data = bdata)
nsim <- 1000  # Number of simulations for each observation
my.sims <- simulate(fit1, nsim = nsim)
dim(my.sims)  # A data frame
#> [1]  200 1000
aaa <- array(unlist(my.sims), c(n, nsim, ncol(fitted(fit1))))  # n by N by F
summary(rowMeans(aaa[, , 1]) - fitted(fit1)[, 1])  # Should be all 0s
#>       Min.    1st Qu.     Median       Mean    3rd Qu.       Max. 
#> -0.0659074 -0.0242755 -0.0003540  0.0009241  0.0238505  0.0802246 
summary(rowMeans(aaa[, , 2]) - fitted(fit1)[, 2])  # Should be all 0s
#>      Min.   1st Qu.    Median      Mean   3rd Qu.      Max. 
#> -0.054374 -0.015796  0.002093  0.004902  0.024698  0.090122 

# An n x F x N example
n <- 100; set.seed(111); nsim <- 1000
zdata <- data.frame(x2 = runif(n))
zdata <- transform(zdata, lambda1 =  loglink(-0.5 + 2 * x2, inverse = TRUE),
                          lambda2 =  loglink( 0.5 + 2 * x2, inverse = TRUE),
                          pstr01  = logitlink( 0,            inverse = TRUE),
                          pstr02  = logitlink(-1.0,          inverse = TRUE))
zdata <- transform(zdata, y1 = rzipois(n, lambda = lambda1, pstr0 = pstr01),
                          y2 = rzipois(n, lambda = lambda2, pstr0 = pstr02))
zip.fit  <- vglm(cbind(y1, y2) ~ x2, zipoissonff, data = zdata, crit = "coef")
my.sims <- simulate(zip.fit, nsim = nsim)
dim(my.sims)  # A data frame
#> [1]  200 1000
aaa <- array(unlist(my.sims), c(n, ncol(fitted(zip.fit)), nsim))  # n by F by N
summary(rowMeans(aaa[, 1, ]) - fitted(zip.fit)[, 1])  # Should be all 0s
#>      Min.   1st Qu.    Median      Mean   3rd Qu.      Max. 
#> -0.176434 -0.024770  0.001744 -0.002052  0.023224  0.113060 
summary(rowMeans(aaa[, 2, ]) - fitted(zip.fit)[, 2])  # Should be all 0s
#>       Min.    1st Qu.     Median       Mean    3rd Qu.       Max. 
#> -0.5001125 -0.0682611 -0.0030149 -0.0007428  0.0745908  0.3279039