The lmmprobe package implements a partitioned Empirical
Bayes ECM algorithm for sparse high-dimensional linear mixed modeling.
This vignette demonstrates how to simulate data and run the
lmmprobe function.
We generate a small synthetic dataset with random intercepts.
set.seed(123)
n <- 50 # Number of subjects
m <- 4 # Observations per subject
p <- 20 # Number of high-dimensional predictors (Z)
sigma_e <- 1.5 # Residual standard deviation
sigma_b <- 1.0 # Random intercept standard deviation
# Subject IDs
ID <- rep(1:n, each = m)
N <- n * m
# High-dimensional predictors (Z) - simple random normal
Z <- matrix(rnorm(N * p), ncol = p)
colnames(Z) <- paste0("z", 1:p)
# Sparse coefficients for Z
beta_true <- rep(0, p)
beta_true[1:3] <- c(0.5, -0.5, 0.3) # First 3 are active
# Random Intercepts
b <- rnorm(n, 0, sigma_b)
b_vec <- rep(b, each = m)
# Response variable Y
# Y = Z*beta + b + e
Y <- Z %*% beta_true + b_vec + rnorm(N, 0, sigma_e)
# Fixed effect covariates (V)
# For scenario 1 (random intercept), V is a 1-column matrix of IDs.
V <- matrix(ID, nrow = N, ncol = 1)# Preparing inputs
# Y is vector/matrix
# Z is matrix of high-dim predictors
# ID_data is vector of IDs
# V is matrix of IDs (for random intercept)
fit <- lmmprobe(
Y = matrix(Y, ncol = 1),
Z = Z,
V = V,
ID_data = ID,
Y_test = matrix(Y, ncol = 1),
Z_test = Z,
V_test = V,
ID_test = ID,
alpha = 0.05,
maxit = 10, # Short run for vignette
ep = 0.5,
cpus = 1, # Serial for vignette
B = 3,
adj = 1,
LR = 0,
C = 1,
sigma_init = NULL
)
#> Converged
#> Iteration=3 Convergence Crit=0.335
# Inspect results
print(fit$c_coefs)
#> [,1]
#> [1,] -0.2033882
#> [2,] 1.3793191
#> [3,] 0.5922036
print(head(fit$beta))
#> [1] 0.50048486 -0.45182165 0.37168600 -0.03933587 -0.07701726 -0.05637764