Many national household surveys use rotating panel designs, where a sample of respondents is interviewed in an initial wave (implantation) and then followed up over successive periods. Uruguay’s ECH, for example, interviews each household once and then conducts monthly follow-ups for the rest of the year.
metasurvey provides two classes for this type of design:
RotativePanelSurvey – a panel with an implantation
survey and a list of follow-up surveysPoolSurvey – a collection of surveys grouped together
for combined estimation across periodsA RotativePanelSurvey requires an implantation
Survey and one or more follow-up Survey
objects.
library(metasurvey)
library(data.table)
set_use_copy(TRUE)
set.seed(42)
n <- 100
make_survey <- function(edition) {
dt <- data.table(
id = 1:n,
age = sample(18:80, n, replace = TRUE),
income = round(runif(n, 5000, 80000)),
employed = sample(0:1, n, replace = TRUE),
w = round(runif(n, 0.5, 3.0), 4)
)
Survey$new(
data = dt, edition = edition, type = "ech",
psu = NULL, engine = "data.table",
weight = add_weight(annual = "w")
)
}
# Implantation: 2023 wave 1
impl <- make_survey("2023")
# Follow-ups: waves 2 through 4
fu_2 <- make_survey("2023")
fu_3 <- make_survey("2023")
fu_4 <- make_survey("2023")
panel <- RotativePanelSurvey$new(
implantation = impl,
follow_up = list(fu_2, fu_3, fu_4),
type = "ech",
default_engine = "data.table",
steps = list(),
recipes = list(),
workflows = list(),
design = NULL
)Use get_implantation() and get_follow_up()
to retrieve the individual surveys:
Apply transformations to individual panel components. The same step functions work on both the implantation and follow-up surveys:
# Transform the implantation survey
panel$implantation <- step_compute(panel$implantation,
income_k = income / 1000,
comment = "Income in thousands"
)
# Apply the same step to each follow-up
panel$follow_up <- lapply(panel$follow_up, function(svy) {
step_compute(svy, income_k = income / 1000, comment = "Income in thousands")
})Use workflow() on individual panel components to perform
cross-sectional or time-series analysis.
result_impl <- workflow(
list(panel$implantation),
survey::svymean(~income, na.rm = TRUE),
estimation_type = "annual"
)
result_impl
#> stat value se cv confint_lower
#> <char> <num> <num> <num> <num>
#> 1: survey::svymean: income 43742.45 2420.706 0.05533997 38997.96
#> confint_upper
#> <num>
#> 1: 48486.95results <- rbindlist(lapply(seq_along(panel$follow_up), function(i) {
r <- workflow(
list(panel$follow_up[[i]]),
survey::svymean(~income, na.rm = TRUE),
estimation_type = "annual"
)
r$period <- panel$follow_up[[i]]$edition
r
}))
results[, .(period, stat, value, se, cv)]
#> period stat value se cv
#> <num> <char> <num> <num> <num>
#> 1: 2023 survey::svymean: income 41537.99 2328.715 0.05606230
#> 2: 2023 survey::svymean: income 42809.39 2383.860 0.05568546
#> 3: 2023 survey::svymean: income 41314.46 2232.803 0.05404409A PoolSurvey groups multiple surveys for combined
estimation. This is useful when you want to aggregate monthly data into
quarterly or annual estimates, or when combining surveys reduces
sampling variability.
The constructor takes a nested list:
list(estimation_type = list(group = list(surveys))).
s1 <- make_survey("2023")
s2 <- make_survey("2023")
s3 <- make_survey("2023")
pool <- PoolSurvey$new(
list(annual = list("q1" = list(s1, s2, s3)))
)
class(pool)
#> [1] "PoolSurvey" "R6"pool_result <- workflow(
pool,
survey::svymean(~income, na.rm = TRUE),
estimation_type = "annual"
)
pool_result
#> stat value se cv confint_lower
#> <char> <num> <num> <num> <num>
#> 1: survey::svymean: income 44574.23 2226.844 0.04995811 40209.69
#> 2: survey::svymean: income 44341.53 2407.912 0.05430377 39622.10
#> 3: survey::svymean: income 41293.92 2239.968 0.05424451 36903.66
#> confint_upper period type variance
#> <num> <num> <char> <num>
#> 1: 48938.76 2023 q1 4958836
#> 2: 49060.95 2023 q1 5798041
#> 3: 45684.18 2023 q1 5017458Surveys can be organized into multiple groups:
s4 <- make_survey("2023")
s5 <- make_survey("2023")
s6 <- make_survey("2023")
pool_semester <- PoolSurvey$new(
list(annual = list(
"q1" = list(s1, s2, s3),
"q2" = list(s4, s5, s6)
))
)
result_semester <- workflow(
pool_semester,
survey::svymean(~income, na.rm = TRUE),
estimation_type = "annual"
)
result_semester
#> stat value se cv confint_lower
#> <char> <num> <num> <num> <num>
#> 1: survey::svymean: income 44574.23 2226.844 0.04995811 40209.69
#> 2: survey::svymean: income 44341.53 2407.912 0.05430377 39622.10
#> 3: survey::svymean: income 41293.92 2239.968 0.05424451 36903.66
#> 4: survey::svymean: income 41760.06 2296.170 0.05498485 37259.64
#> 5: survey::svymean: income 48051.67 2374.145 0.04940816 43398.43
#> 6: survey::svymean: income 42721.21 2276.767 0.05329359 38258.83
#> confint_upper period type variance
#> <num> <num> <char> <num>
#> 1: 48938.76 2023 q1 4958836
#> 2: 49060.95 2023 q1 5798041
#> 3: 45684.18 2023 q1 5017458
#> 4: 46260.47 2023 q2 5272399
#> 5: 52704.90 2023 q2 5636562
#> 6: 47183.59 2023 q2 5183666Use extract_surveys() to select specific periods from a
RotativePanelSurvey:
metasurvey provides utilities for working with survey edition dates:
# Extract periodicity from edition strings
extract_time_pattern("2023")
#> $year
#> [1] 2023
#>
#> $periodicity
#> [1] "Annual"
extract_time_pattern("2023-06")
#> $year
#> [1] 2023
#>
#> $month
#> [1] 6
#>
#> $periodicity
#> [1] "Monthly"# Validate edition format
validate_time_pattern(svy_type = "ech", svy_edition = "2023")
#> $svy_type
#> [1] "ech"
#>
#> $svy_edition
#> [1] 2023
#>
#> $svy_periodicity
#> [1] "Annual"# Group dates by period
dates <- as.Date(c(
"2023-01-15", "2023-03-20", "2023-06-10",
"2023-09-05", "2023-11-30"
))
group_dates(dates, type = "quarterly")
#> 2023-01-15 2023-03-20 2023-06-10 2023-09-05 2023-11-30
#> 1 1 2 3 4
group_dates(dates, type = "biannual")
#> 2023-01-15 2023-03-20 2023-06-10 2023-09-05 2023-11-30
#> 1 1 1 2 2In practice, panel data is loaded from files using
load_panel_survey():
panel <- load_panel_survey(
path_implantation = "data/ECH_implantacion_2023.csv",
path_follow_up = "data/seguimiento/",
svy_type = "ech",
svy_weight_implantation = add_weight(annual = "pesoano"),
svy_weight_follow_up = add_weight(monthly = "pesomes")
)
# Access components
imp <- get_implantation(panel)
fups <- get_follow_up(panel)For surveys that provide bootstrap replicate weights (such as the
ECH), use add_replicate() inside add_weight()
to configure robust variance estimation:
panel <- load_panel_survey(
path_implantation = "data/ECH_implantacion_2023.csv",
path_follow_up = "data/seguimiento/",
svy_type = "ech",
svy_weight_implantation = add_weight(
annual = add_replicate(
weight = "pesoano",
replicate_pattern = "wr\\d+",
replicate_path = "data/pesos_replicados_anual.csv",
replicate_id = c("numero" = "numero"),
replicate_type = "bootstrap"
)
),
svy_weight_follow_up = add_weight(monthly = "pesomes")
)When replicate weights are configured, workflow()
automatically uses survey::svrepdesign() for variance
estimation instead of the standard Taylor linearization approach.
workflow() and
RecipeWorkflow