Log Marginal Likelihood via Bridge Sampling

Computes log marginal likelihood via bridge sampling.

Usage

bridge_sampler(samples, ...)

# S3 method for class 'CmdStanMCMC'
bridge_sampler(
  samples = NULL,
  repetitions = 1,
  method = "normal",
  cores = 1,
  use_neff = TRUE,
  maxiter = 1000,
  silent = FALSE,
  verbose = FALSE,
  ...
)

# S3 method for class 'stanfit'
bridge_sampler(
  samples = NULL,
  stanfit_model = samples,
  repetitions = 1,
  method = "normal",
  cores = 1,
  use_neff = TRUE,
  maxiter = 1000,
  silent = FALSE,
  verbose = FALSE,
  ...
)

# S3 method for class 'mcmc.list'
bridge_sampler(
  samples = NULL,
  log_posterior = NULL,
  ...,
  data = NULL,
  lb = NULL,
  ub = NULL,
  repetitions = 1,
  param_types = rep("real", ncol(samples[[1]])),
  method = "normal",
  cores = 1,
  use_neff = TRUE,
  packages = NULL,
  varlist = NULL,
  envir = .GlobalEnv,
  rcppFile = NULL,
  maxiter = 1000,
  silent = FALSE,
  verbose = FALSE
)

# S3 method for class 'mcmc'
bridge_sampler(
  samples = NULL,
  log_posterior = NULL,
  ...,
  data = NULL,
  lb = NULL,
  ub = NULL,
  repetitions = 1,
  method = "normal",
  cores = 1,
  use_neff = TRUE,
  packages = NULL,
  varlist = NULL,
  envir = .GlobalEnv,
  rcppFile = NULL,
  maxiter = 1000,
  param_types = rep("real", ncol(samples)),
  silent = FALSE,
  verbose = FALSE
)

# S3 method for class 'matrix'
bridge_sampler(
  samples = NULL,
  log_posterior = NULL,
  ...,
  data = NULL,
  lb = NULL,
  ub = NULL,
  repetitions = 1,
  method = "normal",
  cores = 1,
  use_neff = TRUE,
  packages = NULL,
  varlist = NULL,
  envir = .GlobalEnv,
  rcppFile = NULL,
  maxiter = 1000,
  param_types = rep("real", ncol(samples)),
  silent = FALSE,
  verbose = FALSE
)

# S3 method for class 'stanreg'
bridge_sampler(
  samples,
  repetitions = 1,
  method = "normal",
  cores = 1,
  use_neff = TRUE,
  maxiter = 1000,
  silent = FALSE,
  verbose = FALSE,
  ...
)

# S3 method for class 'rjags'
bridge_sampler(
  samples = NULL,
  log_posterior = NULL,
  ...,
  data = NULL,
  lb = NULL,
  ub = NULL,
  repetitions = 1,
  method = "normal",
  cores = 1,
  use_neff = TRUE,
  packages = NULL,
  varlist = NULL,
  envir = .GlobalEnv,
  rcppFile = NULL,
  maxiter = 1000,
  silent = FALSE,
  verbose = FALSE
)

# S3 method for class 'runjags'
bridge_sampler(
  samples = NULL,
  log_posterior = NULL,
  ...,
  data = NULL,
  lb = NULL,
  ub = NULL,
  repetitions = 1,
  method = "normal",
  cores = 1,
  use_neff = TRUE,
  packages = NULL,
  varlist = NULL,
  envir = .GlobalEnv,
  rcppFile = NULL,
  maxiter = 1000,
  silent = FALSE,
  verbose = FALSE
)

# S3 method for class 'MCMC_refClass'
bridge_sampler(
  samples,
  repetitions = 1,
  method = "normal",
  cores = 1,
  use_neff = TRUE,
  maxiter = 1000,
  silent = FALSE,
  verbose = FALSE,
  ...
)

Arguments

samples: an mcmc.list object, a fitted stanfit object, a stanreg object, an rjags object, a runjags object, or a matrix with posterior samples (colnames need to correspond to parameter names in lb and ub) with posterior samples.
...: additional arguments passed to log_posterior. Ignored for the stanfit and stanreg methods.
repetitions: number of repetitions.
method: either "normal" or "warp3".
cores: number of cores used for evaluating log_posterior. On unix-like systems (where .Platform$OS.type == "unix" evaluates to TRUE; e.g., Linux and Mac OS) forking via mclapply is used. Hence elements needed for evaluation should be in the .GlobalEnv. For other systems (e.g., Windows) makeCluster is used and further arguments specified below will be used.
use_neff: Logical. If TRUE, the effective sample size (compared to the nominal sample size) is used in the optimal bridge function and in the iterative scheme's uncertainty calculations (making MCSE computation take into account autocorrelation in MCMC samples). Default is TRUE. If FALSE, the nominal sample size is used instead. If samples is a matrix, it is assumed that the matrix contains the samples of one chain in order. If samples come from more than one chain, we recommend to use an mcmc.list object for optimal performance.
maxiter: maximum number of iterations for the iterative updating scheme. Default is 1,000 to avoid infinite loops.
silent: Boolean which determines whether to print the number of iterations of the updating scheme to the console. Default is FALSE.
verbose: Boolean. Should internal debug information be printed to console? Default is FALSE.
stanfit_model: for the stanfit method, an additional object of class "stanfit" with the same model as samples, which will be used for evaluating the log_posterior (i.e., it does not need to contain any samples). The default is to use samples. In case samples was compiled in a different R session or on another computer with a different OS or setup, the samples model usually cannot be used for evaluation. In this case, one can compile the model on the current computer with iter = 0 and pass it here (this usually needs to be done before samples is loaded).
log_posterior: function or name of function that takes a parameter vector and the data as input and returns the log of the unnormalized posterior density (i.e., a scalar value). If the function name is passed, the function should exist in the .GlobalEnv. For special behavior if cores > 1 see Details.
data: data object which is used in log_posterior.
lb: named vector with lower bounds for parameters.
ub: named vector with upper bounds for parameters.
param_types: character vector of length ncol(samples) with "real", "simplex" or "circular". For all regular bounded or unbounded continuous parameters, this should just be "real". However, if there are parameters which lie on a simplex or on the circle, this should be noted here. Simplex parameters are parameters which are bounded below by zero and collectively sum to one, such as weights in a mixture model. For these, the stick-breaking transformation is performed as described in the Stan reference manual. The circular variables are given a numerical representation to which the normal distribution is most likely a good fit. Only possible to use with bridge_sampler.matrix.
packages: character vector with names of packages needed for evaluating log_posterior in parallel (only relevant if cores > 1 and .Platform$OS.type != "unix").
varlist: character vector with names of variables needed for evaluating log_posterior (only needed if cores > 1 and .Platform$OS.type != "unix" as these objects will be exported to the nodes). These objects need to exist in envir.
envir: specifies the environment for varlist (only needed if cores > 1 and .Platform$OS.type != "unix" as these objects will be exported to the nodes). Default is .GlobalEnv.
rcppFile: in case cores > 1 and log_posterior is an Rcpp function, rcppFile specifies the path to the cpp file (will be compiled on all cores).

Value

If repetitions = 1, returns a list of class "bridge" with components:

logml: estimate of the log marginal likelihood.
niter: number of iterations of the iterative updating scheme.
method: bridge sampling method that was used to obtain the estimate.
q11: log posterior evaluations for posterior samples.
q12: log proposal evaluations for posterior samples.
q21: log posterior evaluations for samples from the proposal.
q22: log proposal evaluations for samples from the proposal.
mcse_logml: Monte Carlo standard error of logml on the log-scale (Micaletto & Vehtari, 2025).

If repetitions > 1, returns a list of class "bridge_list" with components:

logml: numeric vector of log marginal likelihood estimates.
niter: numeric vector with the number of iterations of the iterative updating scheme for each repetition.
method: bridge sampling method that was used to obtain the estimates.
repetitions: number of repetitions.
mcse_logml: numeric vector of Monte Carlo standard errors on the log-scale (Micaletto & Vehtari, 2025), one per repetition.

Details

Bridge sampling is implemented as described in Meng and Wong (1996, see equation 4.1) using the "optimal" bridge function. When method = "normal", the proposal distribution is a multivariate normal distribution with mean vector equal to the sample mean vector of samples and covariance matrix equal to the sample covariance matrix of samples. For a recent tutorial on bridge sampling, see Gronau et al. (in press).

When method = "warp3", the proposal distribution is a standard multivariate normal distribution and the posterior distribution is "warped" (Meng & Schilling, 2002) so that it has the same mean vector, covariance matrix, and skew as the samples. method = "warp3" takes approximately twice as long as method = "normal".

Note that for the matrix method, the lower and upper bound of a parameter cannot be a function of the bounds of another parameter. Furthermore, constraints that depend on multiple parameters of the model are not supported. This usually excludes, for example, parameters that constitute a covariance matrix or sets of parameters that need to sum to one.

However, if the retransformations are part of the model itself and the log_posterior accepts parameters on the real line and performs the appropriate Jacobian adjustments, such as done for stanfit and stanreg objects, such constraints are obviously possible (i.e., we currently do not know of any parameter supported within Stan that does not work with the current implementation through a stanfit object).

Parallel Computation

On unix-like systems forking is used via mclapply. Hence elements needed for evaluation of log_posterior should be in the .GlobalEnv.

On other OSes (e.g., Windows), things can get more complicated. For normal parallel computation, the log_posterior function can be passed as both function and function name. If the latter, it needs to exist in the environment specified in the envir argument. For parallel computation when using an Rcpp function, log_posterior can only be passed as the function name (i.e., character). This function needs to result from calling sourceCpp on the file specified in rcppFile.

Due to the way rstan currently works, parallel computations with stanfit and stanreg objects only work with forking (i.e., NOT on Windows).

Note

To be able to use a stanreg object for samples, the user crucially needs to have specified the diagnostic_file when fitting the model in rstanarm.

Warning

Note that the results depend strongly on the parameter priors. Therefore, it is strongly advised to think carefully about the priors before calculating marginal likelihoods. For example, the prior choices implemented in rstanarm or brms might not be optimal from a testing point of view. We recommend to use priors that have been chosen from a testing and not a purely estimation perspective.

Also note that for testing, the number of posterior samples usually needs to be substantially larger than for estimation.

References

Gronau, Q. F., Singmann, H., & Wagenmakers, E.-J. (2020). bridgesampling: An R Package for Estimating Normalizing Constants. Journal of Statistical Software, 92. doi:10.18637/jss.v092.i10

Gronau, Q. F., Sarafoglou, A., Matzke, D., Ly, A., Boehm, U., Marsman, M., Leslie, D. S., Forster, J. J., Wagenmakers, E.-J., & Steingroever, H. (in press). A tutorial on bridge sampling. Journal of Mathematical Psychology. https://arxiv.org/abs/1703.05984
vignette("bridgesampling_tutorial")

Gronau, Q. F., Wagenmakers, E.-J., Heck, D. W., & Matzke, D. (2019). A Simple Method for Comparing Complex Models: Bayesian Model Comparison for Hierarchical Multinomial Processing Tree Models Using Warp-III Bridge Sampling. Psychometrika, 84(1), 261–284. doi:10.1007/s11336-018-9648-3

Meng, X.-L., & Wong, W. H. (1996). Simulating ratios of normalizing constants via a simple identity: A theoretical exploration. Statistica Sinica, 6, 831-860. https://www3.stat.sinica.edu.tw/statistica/j6n4/j6n43/j6n43.htm

Meng, X.-L., & Schilling, S. (2002). Warp bridge sampling. Journal of Computational and Graphical Statistics, 11(3), 552-586. doi:10.1198/106186002457

Micaletto, G., & Vehtari, A. (2025). Monte Carlo standard errors for bridge sampling marginal likelihood estimation. arXiv preprint, arXiv:2508.14487. https://arxiv.org/abs/2508.14487

Overstall, A. M., & Forster, J. J. (2010). Default Bayesian model determination methods for generalised linear mixed models. Computational Statistics & Data Analysis, 54, 3269-3288. doi:10.1016/j.csda.2010.03.008

Author

Quentin F. Gronau and Henrik Singmann. Parallel computing (i.e., cores > 1) and the stanfit method use code from rstan by Jiaqing Guo, Jonah Gabry, and Ben Goodrich. Ben Goodrich added the stanreg method. Kees Mulder added methods for simplex and circular variables. Giorgio Micaletto and Aki Vehtari added the CmdStanMCMC method (for cmdstanr) and calculation of the Monte Carlo Standard Error (MCSE).

Examples

## ------------------------------------------------------------------------
## Example 1: Estimating the Normalizing Constant of a Two-Dimensional
##            Standard Normal Distribution
## ------------------------------------------------------------------------

library(bridgesampling)
library(mvtnorm)

samples <- rmvnorm(1e4, mean = rep(0, 2), sigma = diag(2))
colnames(samples) <- c("x1", "x2")
log_density <- function(samples.row, data) {
  -.5*t(samples.row) %*% samples.row
}

lb <- rep(-Inf, 2)
ub <- rep(Inf, 2)
names(lb) <- names(ub) <- colnames(samples)
bridge_result <- bridge_sampler(samples = samples, log_posterior = log_density,
                                data = NULL, lb = lb, ub = ub, silent = TRUE)

# compare to analytical value
analytical <- log(2*pi)
print(cbind(bridge_result$logml, analytical))
#>              analytical
#> [1,] 1.83804   1.837877

if (FALSE) { # \dontrun{

## ------------------------------------------------------------------------
## Example 2: Hierarchical Normal Model
## ------------------------------------------------------------------------

# for a full description of the example, see
vignette("bridgesampling_example_jags")

library(R2jags)

### generate data ###

set.seed(12345)

mu <- 0
tau2 <- 0.5
sigma2 <- 1

n <- 20
theta <- rnorm(n, mu, sqrt(tau2))
y <- rnorm(n, theta, sqrt(sigma2))


### set prior parameters
alpha <- 1
beta <- 1
mu0 <- 0
tau20 <- 1

### functions to get posterior samples ###

### H0: mu = 0

getSamplesModelH0 <- function(data, niter = 52000, nburnin = 2000, nchains = 3) {

  model <- "
    model {
      for (i in 1:n) {
        theta[i] ~ dnorm(0, invTau2)
          y[i] ~ dnorm(theta[i], 1/sigma2)
      }
      invTau2 ~ dgamma(alpha, beta)
      tau2 <- 1/invTau2
    }"

  s <- jags(data, parameters.to.save = c("theta", "invTau2"),
            model.file = textConnection(model),
            n.chains = nchains, n.iter = niter,
            n.burnin = nburnin, n.thin = 1)

  return(s)

}

### H1: mu != 0

getSamplesModelH1 <- function(data, niter = 52000, nburnin = 2000,
                              nchains = 3) {

  model <- "
    model {
      for (i in 1:n) {
        theta[i] ~ dnorm(mu, invTau2)
        y[i] ~ dnorm(theta[i], 1/sigma2)
      }
      mu ~ dnorm(mu0, 1/tau20)
      invTau2 ~ dgamma(alpha, beta)
      tau2 <- 1/invTau2
    }"

  s <- jags(data, parameters.to.save = c("theta", "mu", "invTau2"),
            model.file = textConnection(model),
            n.chains = nchains, n.iter = niter,
            n.burnin = nburnin, n.thin = 1)

  return(s)

}

### get posterior samples ###

# create data lists for Jags
data_H0 <- list(y = y, n = length(y), alpha = alpha, beta = beta, sigma2 = sigma2)
data_H1 <- list(y = y, n = length(y), mu0 = mu0, tau20 = tau20, alpha = alpha,
                beta = beta, sigma2 = sigma2)

# fit models
samples_H0 <- getSamplesModelH0(data_H0)
samples_H1 <- getSamplesModelH1(data_H1)


### functions for evaluating the unnormalized posteriors on log scale ###
log_posterior_H0 <- function(samples.row, data) {

  mu <- 0
  invTau2 <- samples.row[[ "invTau2" ]]
  theta <- samples.row[ paste0("theta[", seq_along(data$y), "]") ]

  sum(dnorm(data$y, theta, data$sigma2, log = TRUE)) +
    sum(dnorm(theta, mu, 1/sqrt(invTau2), log = TRUE)) +
    dgamma(invTau2, data$alpha, data$beta, log = TRUE)

}

log_posterior_H1 <- function(samples.row, data) {

  mu <- samples.row[[ "mu" ]]
  invTau2 <- samples.row[[ "invTau2" ]]
  theta <- samples.row[ paste0("theta[", seq_along(data$y), "]") ]

  sum(dnorm(data$y, theta, data$sigma2, log = TRUE)) +
    sum(dnorm(theta, mu, 1/sqrt(invTau2), log = TRUE)) +
    dnorm(mu, data$mu0, sqrt(data$tau20), log = TRUE) +
    dgamma(invTau2, data$alpha, data$beta, log = TRUE)

}

# specify parameter bounds H0
cn <- colnames(samples_H0$BUGSoutput$sims.matrix)
cn <- cn[cn != "deviance"]
lb_H0 <- rep(-Inf, length(cn))
ub_H0 <- rep(Inf, length(cn))
names(lb_H0) <- names(ub_H0) <- cn
lb_H0[[ "invTau2" ]] <- 0

# specify parameter bounds H1
cn <- colnames(samples_H1$BUGSoutput$sims.matrix)
cn <- cn[cn != "deviance"]
lb_H1 <- rep(-Inf, length(cn))
ub_H1 <- rep(Inf, length(cn))
names(lb_H1) <- names(ub_H1) <- cn
lb_H1[[ "invTau2" ]] <- 0


# compute log marginal likelihood via bridge sampling for H0
H0.bridge <- bridge_sampler(samples = samples_H0, data = data_H0,
                            log_posterior = log_posterior_H0, lb = lb_H0,
                            ub = ub_H0, silent = TRUE)
print(H0.bridge)

# compute log marginal likelihood via bridge sampling for H1
H1.bridge <- bridge_sampler(samples = samples_H1, data = data_H1,
                            log_posterior = log_posterior_H1, lb = lb_H1,
                            ub = ub_H1, silent = TRUE)
print(H1.bridge)

# compute percentage error
print(error_measures(H0.bridge)$percentage)
print(error_measures(H1.bridge)$percentage)

# compute Bayes factor
BF01 <- bf(H0.bridge, H1.bridge)
print(BF01)

# compute posterior model probabilities (assuming equal prior model probabilities)
post1 <- post_prob(H0.bridge, H1.bridge)
print(post1)

# compute posterior model probabilities (using user-specified prior model probabilities)
post2 <- post_prob(H0.bridge, H1.bridge, prior_prob = c(.6, .4))
print(post2)

} # }

if (FALSE) { # \dontrun{

## ------------------------------------------------------------------------
## Example 3: rstanarm
## ------------------------------------------------------------------------
library(rstanarm)

# N.B.: remember to specify the diagnostic_file

fit_1 <- stan_glm(mpg ~ wt + qsec + am, data = mtcars,
                  chains = 2, cores = 2, iter = 5000,
                  diagnostic_file = file.path(tempdir(), "df.csv"))
bridge_1 <- bridge_sampler(fit_1)
fit_2 <- update(fit_1, formula = . ~ . + cyl)
bridge_2 <- bridge_sampler(fit_2, method = "warp3")
bf(bridge_1, bridge_2)

} # }