Getting Started with rMIDAS2

Overview

rMIDAS2 is a lightweight R client for the MIDAS2 Python package. It lets you perform multiple imputation using denoising autoencoders without using reticulate at runtime.

1. A small executable example

The chunk below runs during vignette build. It uses mocked HTTP responses, so it exercises the package interface without requiring a live Python backend.

mock_json_response <- function(body, status = 200L) {
  function(req) {
    httr2::response(
      status_code = status,
      headers = list("Content-Type" = "application/json"),
      body = charToRaw(jsonlite::toJSON(body, auto_unbox = TRUE))
    )
  }
}

pkg_env <- rMIDAS2:::.pkg_env
old_process <- pkg_env$process
old_port <- pkg_env$port
old_base_url <- pkg_env$base_url
on.exit({
  pkg_env$process <- old_process
  pkg_env$port <- old_port
  pkg_env$base_url <- old_base_url
}, add = TRUE)

pkg_env$process <- list(is_alive = function() TRUE)
pkg_env$port <- 9999L
pkg_env$base_url <- "http://127.0.0.1:9999"

example_data <- data.frame(
  Y = c(1.2, -0.4, 0.7),
  X1 = c(NA, 0.5, -1.1),
  X2 = c(0.3, 1.4, -0.2)
)

mock_complete <- mock_json_response(list(
  model_id = "mod-001",
  m = 2,
  columns = list("Y", "X1", "X2"),
  imputations = list(
    list(list(1.2, 0.1, 0.3), list(-0.4, 0.5, 1.4), list(0.7, -1.1, -0.2)),
    list(list(1.2, 0.2, 0.3), list(-0.4, 0.5, 1.4), list(0.7, -1.1, -0.2))
  )
))

result <- httr2::with_mocked_responses(mock_complete, {
  midas(example_data, m = 2, epochs = 1)
})
#> Training MIDAS model (1 epochs)...
#> Training complete.

result$model_id
#> [1] "mod-001"
head(result$imputations[[1]])
#>      Y   X1   X2
#> 1  1.2  0.1  0.3
#> 2 -0.4  0.5  1.4
#> 3  0.7 -1.1 -0.2

2. Install the Python backend

install_backend(method = "pip")

3. Create data with missing values

set.seed(42)
n <- 500
df <- data.frame(
  Y = rnorm(n),
  X1 = rnorm(n),
  X2 = rnorm(n)
)
df$X1[df$X2 > 0.5] <- NA
head(df)
#>            Y         X1         X2
#> 1  1.3709584         NA  2.3250585
#> 2 -0.5646982         NA  0.5241222
#> 3  0.3631284         NA  0.9707334
#> 4  0.6328626  0.1360096  0.3769734
#> 5  0.4042683 -0.7201535 -0.9959334
#> 6 -0.1061245 -0.1981243 -0.5974829

4. Fit and impute (all-in-one)

result <- midas(df, m = 10, epochs = 20)

# View first imputed dataset
head(result$imputations[[1]])

5. Step-by-step workflow

# Fit the model
fit <- midas_fit(df, epochs = 20)

# Generate imputations — pass the fitted object directly
imps <- midas_transform(fit, m = 10)

head(imps[[1]])

6. Mean imputation

mean_df <- imp_mean(fit)
head(mean_df)

7. Rubin’s rules regression

reg <- combine(fit, y = "Y")
reg

8. Overimputation diagnostic

diag <- overimpute(fit, mask_frac = 0.1)
diag$mean_rmse
diag$rmse

9. Stopping the server

stop_server()

mirror server hosted at Truenetwork, Russian Federation.