ELBO, convergence, and model selection
Gao Wang
2026-05-17
Source:vignettes/elbo_and_convergence.Rmd
elbo_and_convergence.RmdThis vignette covers the evidence lower bound (ELBO) used by mvSuSiE for model fitting and convergence monitoring, and compares the EM and optim algorithms for prior variance estimation.
data(simdata)
X <- simdata$raw$X
n <- nrow(X); p <- ncol(X); r <- 10
causal <- sort(sample(p, 3))
B <- matrix(0, p, r)
for (j in causal) B[j, ] <- rnorm(r, 0, 0.5)
Y <- X %*% B + matrix(rnorm(n * r), n, r)ELBO and convergence
mvSuSiE uses the Iterative Bayesian Stepwise Selection (IBSS) algorithm to maximize the evidence lower bound (ELBO). The ELBO is a lower bound on the log marginal likelihood and should increase monotonically at each iteration.
prior <- create_mixture_prior(R = r)
fit <- mvsusie(X, Y, L = 10,
prior_variance = prior,
tol = 1e-4,
verbose = FALSE)
elbo <- fit$elbo
cat("Iterations:", length(elbo), "\n")
cat("Final ELBO:", tail(elbo, 1), "\n")
pdat <- data.frame(iter = seq_along(elbo), elbo = elbo)
ggplot(pdat, aes(x = iter, y = elbo)) +
geom_line(color = "royalblue") +
geom_point(color = "royalblue", size = 2) +
labs(x = "Iteration", y = "ELBO") +
theme_cowplot(font_size = 12)
# Iterations: 4
# Final ELBO: -8294.175
Effect pruning via log Bayes factors
mvSuSiE fits a model with L single
effects. Effects that do not contribute to the model have small or
negative log Bayes factors (LBF) and are automatically pruned when
estimate_prior_variance = TRUE. This makes the model robust
to over-specification of L.
cat("L =", length(fit$lbf), "single effects\n")
cat("Log Bayes factors:\n")
for (l in seq_along(fit$lbf)) {
pruned <- if (fit$V[l] < 1e-9) " [pruned]" else ""
cat(sprintf(" Effect %d: lbf = %8.2f, V = %.4e%s\n",
l, fit$lbf[l], fit$V[l], pruned))
}
# L = 10 single effects
# Log Bayes factors:
# Effect 1: lbf = 178.51, V = 5.8701e-02
# Effect 2: lbf = 105.96, V = 3.6540e-02
# Effect 3: lbf = 49.91, V = 2.0270e-02
# Effect 4: lbf = 0.00, V = 0.0000e+00 [pruned]
# Effect 5: lbf = 0.00, V = 0.0000e+00 [pruned]
# Effect 6: lbf = 0.00, V = 0.0000e+00 [pruned]
# Effect 7: lbf = 0.00, V = 0.0000e+00 [pruned]
# Effect 8: lbf = 0.00, V = 0.0000e+00 [pruned]
# Effect 9: lbf = 0.00, V = 0.0000e+00 [pruned]
# Effect 10: lbf = 0.00, V = 0.0000e+00 [pruned]
lbf_dat <- data.frame(effect = seq_along(fit$lbf),
lbf = fit$lbf,
active = fit$V > 1e-9)
ggplot(lbf_dat, aes(x = effect, y = lbf, fill = active)) +
geom_col() +
scale_fill_manual(values = c("gray70", "royalblue"),
labels = c("Pruned", "Active")) +
labs(x = "Single effect", y = "Log Bayes factor", fill = "") +
theme_cowplot(font_size = 12)
EM vs optim for prior variance estimation
The estimate_prior_variance option (default
TRUE) allows mvSuSiE to estimate a scalar multiplier for
the prior. mvSuSiE provides two algorithms for estimating the prior
variance scaling factor: "EM" (expectation-maximization)
and "optim" (Brent optimizer). The optim method typically
converges in fewer iterations because it directly maximizes the ELBO
with respect to the prior scaling factor at each iteration.
fit_em <- mvsusie(X, Y, L = 10,
prior_variance = prior,
estimate_prior_method = "EM",
tol = 1e-4, verbose = FALSE)
# WARNING: IBSS algorithm did not converge in 100 iterations!
# Warning: IBSS algorithm did not converge in 100 iterations!
fit_optim <- mvsusie(X, Y, L = 10,
prior_variance = prior,
estimate_prior_method = "optim",
tol = 1e-4, verbose = FALSE)
cat("EM: ", length(fit_em$elbo), "iterations, final ELBO =",
tail(fit_em$elbo, 1), "\n")
cat("optim:", length(fit_optim$elbo), "iterations, final ELBO =",
tail(fit_optim$elbo, 1), "\n")
# EM: 100 iterations, final ELBO = -8298.009
# optim: 4 iterations, final ELBO = -8294.175
max_iter <- max(length(fit_em$elbo), length(fit_optim$elbo))
pdat <- data.frame(
iter = c(seq_along(fit_em$elbo), seq_along(fit_optim$elbo)),
elbo = c(fit_em$elbo, fit_optim$elbo),
method = c(rep("EM", length(fit_em$elbo)),
rep("optim", length(fit_optim$elbo)))
)
ggplot(pdat, aes(x = iter, y = elbo, color = method)) +
geom_line(linewidth = 0.8) +
geom_point(size = 2) +
scale_color_manual(values = c("EM" = "royalblue", "optim" = "firebrick")) +
labs(x = "Iteration", y = "ELBO", color = "Method") +
theme_cowplot(font_size = 12)